xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 5e49412c8a17aeaca39d74026da899260f5f63d4)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr);
156   }
157   work = a->mult_work;
158   t = work + k+1;
159   if (!a->sor_work) {
160     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
161   }
162   w = a->sor_work;
163 
164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
165   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
166 
167   if (flag & SOR_ZERO_INITIAL_GUESS) {
168     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169       switch (bs) {
170       case 1:
171         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172         t[0] = b[0];
173         i2     = 1;
174         idiag += 1;
175         for (i=1; i<m; i++) {
176           v  = aa + ai[i];
177           vi = aj + ai[i];
178           nz = diag[i] - ai[i];
179           s[0] = b[i2];
180           for (j=0; j<nz; j++) {
181             xw[0] = x[vi[j]];
182             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183           }
184           t[i2] = s[0];
185           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186           x[i2]  = xw[0];
187           idiag += 1;
188           i2    += 1;
189         }
190         break;
191       case 2:
192         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193         t[0] = b[0]; t[1] = b[1];
194         i2     = 2;
195         idiag += 4;
196         for (i=1; i<m; i++) {
197           v  = aa + 4*ai[i];
198           vi = aj + ai[i];
199           nz = diag[i] - ai[i];
200           s[0] = b[i2]; s[1] = b[i2+1];
201           for (j=0; j<nz; j++) {
202             idx = 2*vi[j];
203             it  = 4*j;
204             xw[0] = x[idx]; xw[1] = x[1+idx];
205             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206           }
207           t[i2] = s[0]; t[i2+1] = s[1];
208           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209           x[i2]   = xw[0]; x[i2+1] = xw[1];
210           idiag  += 4;
211           i2     += 2;
212         }
213         break;
214       case 3:
215         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217         i2     = 3;
218         idiag += 9;
219         for (i=1; i<m; i++) {
220           v  = aa + 9*ai[i];
221           vi = aj + ai[i];
222           nz = diag[i] - ai[i];
223           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224           while (nz--) {
225             idx = 3*(*vi++);
226             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228             v  += 9;
229           }
230           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233           idiag  += 9;
234           i2     += 3;
235         }
236         break;
237       case 4:
238         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240         i2     = 4;
241         idiag += 16;
242         for (i=1; i<m; i++) {
243           v  = aa + 16*ai[i];
244           vi = aj + ai[i];
245           nz = diag[i] - ai[i];
246           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247           while (nz--) {
248             idx = 4*(*vi++);
249             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251             v  += 16;
252           }
253           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256           idiag  += 16;
257           i2     += 4;
258         }
259         break;
260       case 5:
261         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263         i2     = 5;
264         idiag += 25;
265         for (i=1; i<m; i++) {
266           v  = aa + 25*ai[i];
267           vi = aj + ai[i];
268           nz = diag[i] - ai[i];
269           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270           while (nz--) {
271             idx = 5*(*vi++);
272             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274             v  += 25;
275           }
276           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279           idiag  += 25;
280           i2     += 5;
281         }
282         break;
283       case 6:
284         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286         i2     = 6;
287         idiag += 36;
288         for (i=1; i<m; i++) {
289           v  = aa + 36*ai[i];
290           vi = aj + ai[i];
291           nz = diag[i] - ai[i];
292           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293           while (nz--) {
294             idx = 6*(*vi++);
295             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
296             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298             v  += 36;
299           }
300           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304           idiag  += 36;
305           i2     += 6;
306         }
307         break;
308       case 7:
309         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312         i2     = 7;
313         idiag += 49;
314         for (i=1; i<m; i++) {
315           v  = aa + 49*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
319           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320           while (nz--) {
321             idx = 7*(*vi++);
322             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
323             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325             v  += 49;
326           }
327           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332           idiag  += 49;
333           i2     += 7;
334         }
335         break;
336       default:
337         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
339         i2     = bs;
340         idiag += bs2;
341         for (i=1; i<m; i++) {
342           v  = aa + bs2*ai[i];
343           vi = aj + ai[i];
344           nz = diag[i] - ai[i];
345 
346           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
347           /* copy all rows of x that are needed into contiguous space */
348           workt = work;
349           for (j=0; j<nz; j++) {
350             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
351             workt += bs;
352           }
353           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
356 
357           idiag += bs2;
358           i2    += bs;
359         }
360         break;
361       }
362       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
364       xb = t;
365     }
366     else xb = b;
367     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368       idiag = a->idiag+bs2*(a->mbs-1);
369       i2 = bs * (m-1);
370       switch (bs) {
371       case 1:
372         s[0]  = xb[i2];
373         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374         x[i2] = xw[0];
375         i2   -= 1;
376         for (i=m-2; i>=0; i--) {
377           v  = aa + (diag[i]+1);
378           vi = aj + diag[i] + 1;
379           nz = ai[i+1] - diag[i] - 1;
380           s[0] = xb[i2];
381           for (j=0; j<nz; j++) {
382             xw[0] = x[vi[j]];
383             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384           }
385           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386           x[i2]  = xw[0];
387           idiag -= 1;
388           i2    -= 1;
389         }
390         break;
391       case 2:
392         s[0]  = xb[i2]; s[1] = xb[i2+1];
393         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394         x[i2] = xw[0]; x[i2+1] = xw[1];
395         i2    -= 2;
396         idiag -= 4;
397         for (i=m-2; i>=0; i--) {
398           v  = aa + 4*(diag[i] + 1);
399           vi = aj + diag[i] + 1;
400           nz = ai[i+1] - diag[i] - 1;
401           s[0] = xb[i2]; s[1] = xb[i2+1];
402           for (j=0; j<nz; j++) {
403             idx = 2*vi[j];
404             it  = 4*j;
405             xw[0] = x[idx]; xw[1] = x[1+idx];
406             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407           }
408           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409           x[i2]   = xw[0]; x[i2+1] = xw[1];
410           idiag  -= 4;
411           i2     -= 2;
412         }
413         break;
414       case 3:
415         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418         i2    -= 3;
419         idiag -= 9;
420         for (i=m-2; i>=0; i--) {
421           v  = aa + 9*(diag[i]+1);
422           vi = aj + diag[i] + 1;
423           nz = ai[i+1] - diag[i] - 1;
424           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425           while (nz--) {
426             idx = 3*(*vi++);
427             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429             v  += 9;
430           }
431           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433           idiag  -= 9;
434           i2     -= 3;
435         }
436         break;
437       case 4:
438         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441         i2    -= 4;
442         idiag -= 16;
443         for (i=m-2; i>=0; i--) {
444           v  = aa + 16*(diag[i]+1);
445           vi = aj + diag[i] + 1;
446           nz = ai[i+1] - diag[i] - 1;
447           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448           while (nz--) {
449             idx = 4*(*vi++);
450             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452             v  += 16;
453           }
454           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456           idiag  -= 16;
457           i2     -= 4;
458         }
459         break;
460       case 5:
461         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464         i2    -= 5;
465         idiag -= 25;
466         for (i=m-2; i>=0; i--) {
467           v  = aa + 25*(diag[i]+1);
468           vi = aj + diag[i] + 1;
469           nz = ai[i+1] - diag[i] - 1;
470           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471           while (nz--) {
472             idx = 5*(*vi++);
473             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475             v  += 25;
476           }
477           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479           idiag  -= 25;
480           i2     -= 5;
481         }
482         break;
483       case 6:
484         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487         i2    -= 6;
488         idiag -= 36;
489         for (i=m-2; i>=0; i--) {
490           v  = aa + 36*(diag[i]+1);
491           vi = aj + diag[i] + 1;
492           nz = ai[i+1] - diag[i] - 1;
493           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494           while (nz--) {
495             idx = 6*(*vi++);
496             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
497             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499             v  += 36;
500           }
501           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503           idiag  -= 36;
504           i2     -= 6;
505         }
506         break;
507       case 7:
508         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
509         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513         i2    -= 7;
514         idiag -= 49;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 49*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521           while (nz--) {
522             idx = 7*(*vi++);
523             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
524             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526             v  += 49;
527           }
528           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531           idiag  -= 49;
532           i2     -= 7;
533         }
534         break;
535       default:
536         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
537         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538         i2    -= bs;
539         idiag -= bs2;
540         for (i=m-2; i>=0; i--) {
541           v  = aa + bs2*(diag[i]+1);
542           vi = aj + diag[i] + 1;
543           nz = ai[i+1] - diag[i] - 1;
544 
545           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546           /* copy all rows of x that are needed into contiguous space */
547           workt = work;
548           for (j=0; j<nz; j++) {
549             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
550             workt += bs;
551           }
552           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
554 
555           idiag -= bs2;
556           i2    -= bs;
557         }
558         break;
559       }
560       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
561     }
562     its--;
563   }
564   while (its--) {
565     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566       idiag = a->idiag;
567       i2 = 0;
568       switch (bs) {
569       case 1:
570         for (i=0; i<m; i++) {
571           v  = aa + ai[i];
572           vi = aj + ai[i];
573           nz = ai[i+1] - ai[i];
574           s[0] = b[i2];
575           for (j=0; j<nz; j++) {
576             xw[0] = x[vi[j]];
577             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578           }
579           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580           x[i2] += xw[0];
581           idiag += 1;
582           i2    += 1;
583         }
584         break;
585       case 2:
586         for (i=0; i<m; i++) {
587           v  = aa + 4*ai[i];
588           vi = aj + ai[i];
589           nz = ai[i+1] - ai[i];
590           s[0] = b[i2]; s[1] = b[i2+1];
591           for (j=0; j<nz; j++) {
592             idx = 2*vi[j];
593             it  = 4*j;
594             xw[0] = x[idx]; xw[1] = x[1+idx];
595             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596           }
597           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598           x[i2]  += xw[0]; x[i2+1] += xw[1];
599           idiag  += 4;
600           i2     += 2;
601         }
602         break;
603       case 3:
604         for (i=0; i<m; i++) {
605           v  = aa + 9*ai[i];
606           vi = aj + ai[i];
607           nz = ai[i+1] - ai[i];
608           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609           while (nz--) {
610             idx = 3*(*vi++);
611             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613             v  += 9;
614           }
615           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617           idiag  += 9;
618           i2     += 3;
619         }
620         break;
621       case 4:
622         for (i=0; i<m; i++) {
623           v  = aa + 16*ai[i];
624           vi = aj + ai[i];
625           nz = ai[i+1] - ai[i];
626           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627           while (nz--) {
628             idx = 4*(*vi++);
629             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631             v  += 16;
632           }
633           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635           idiag  += 16;
636           i2     += 4;
637         }
638         break;
639       case 5:
640         for (i=0; i<m; i++) {
641           v  = aa + 25*ai[i];
642           vi = aj + ai[i];
643           nz = ai[i+1] - ai[i];
644           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645           while (nz--) {
646             idx = 5*(*vi++);
647             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649             v  += 25;
650           }
651           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653           idiag  += 25;
654           i2     += 5;
655         }
656         break;
657       case 6:
658         for (i=0; i<m; i++) {
659           v  = aa + 36*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663           while (nz--) {
664             idx = 6*(*vi++);
665             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
666             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668             v  += 36;
669           }
670           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673           idiag  += 36;
674           i2     += 6;
675         }
676         break;
677       case 7:
678         for (i=0; i<m; i++) {
679           v  = aa + 49*ai[i];
680           vi = aj + ai[i];
681           nz = ai[i+1] - ai[i];
682           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
683           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684           while (nz--) {
685             idx = 7*(*vi++);
686             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689             v  += 49;
690           }
691           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694           idiag  += 49;
695           i2     += 7;
696         }
697         break;
698       default:
699         for (i=0; i<m; i++) {
700           v  = aa + bs2*ai[i];
701           vi = aj + ai[i];
702           nz = ai[i+1] - ai[i];
703 
704           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
705           /* copy all rows of x that are needed into contiguous space */
706           workt = work;
707           for (j=0; j<nz; j++) {
708             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
709             workt += bs;
710           }
711           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
713 
714           idiag += bs2;
715           i2    += bs;
716         }
717         break;
718       }
719       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
720     }
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = a->idiag+bs2*(a->mbs-1);
723       i2 = bs * (m-1);
724       switch (bs) {
725       case 1:
726         for (i=m-1; i>=0; i--) {
727           v  = aa + ai[i];
728           vi = aj + ai[i];
729           nz = ai[i+1] - ai[i];
730           s[0] = b[i2];
731           for (j=0; j<nz; j++) {
732             xw[0] = x[vi[j]];
733             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734           }
735           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736           x[i2] += xw[0];
737           idiag -= 1;
738           i2    -= 1;
739         }
740         break;
741       case 2:
742         for (i=m-1; i>=0; i--) {
743           v  = aa + 4*ai[i];
744           vi = aj + ai[i];
745           nz = ai[i+1] - ai[i];
746           s[0] = b[i2]; s[1] = b[i2+1];
747           for (j=0; j<nz; j++) {
748             idx = 2*vi[j];
749             it  = 4*j;
750             xw[0] = x[idx]; xw[1] = x[1+idx];
751             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752           }
753           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754           x[i2]  += xw[0]; x[i2+1] += xw[1];
755           idiag  -= 4;
756           i2     -= 2;
757         }
758         break;
759       case 3:
760         for (i=m-1; i>=0; i--) {
761           v  = aa + 9*ai[i];
762           vi = aj + ai[i];
763           nz = ai[i+1] - ai[i];
764           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765           while (nz--) {
766             idx = 3*(*vi++);
767             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769             v  += 9;
770           }
771           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773           idiag  -= 9;
774           i2     -= 3;
775         }
776         break;
777       case 4:
778         for (i=m-1; i>=0; i--) {
779           v  = aa + 16*ai[i];
780           vi = aj + ai[i];
781           nz = ai[i+1] - ai[i];
782           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783           while (nz--) {
784             idx = 4*(*vi++);
785             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787             v  += 16;
788           }
789           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791           idiag  -= 16;
792           i2     -= 4;
793         }
794         break;
795       case 5:
796         for (i=m-1; i>=0; i--) {
797           v  = aa + 25*ai[i];
798           vi = aj + ai[i];
799           nz = ai[i+1] - ai[i];
800           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801           while (nz--) {
802             idx = 5*(*vi++);
803             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805             v  += 25;
806           }
807           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809           idiag  -= 25;
810           i2     -= 5;
811         }
812         break;
813       case 6:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 36*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819           while (nz--) {
820             idx = 6*(*vi++);
821             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
822             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824             v  += 36;
825           }
826           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829           idiag  -= 36;
830           i2     -= 6;
831         }
832         break;
833       case 7:
834         for (i=m-1; i>=0; i--) {
835           v  = aa + 49*ai[i];
836           vi = aj + ai[i];
837           nz = ai[i+1] - ai[i];
838           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
839           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840           while (nz--) {
841             idx = 7*(*vi++);
842             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845             v  += 49;
846           }
847           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850           idiag  -= 49;
851           i2     -= 7;
852         }
853         break;
854       default:
855         for (i=m-1; i>=0; i--) {
856           v  = aa + bs2*ai[i];
857           vi = aj + ai[i];
858           nz = ai[i+1] - ai[i];
859 
860           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
861           /* copy all rows of x that are needed into contiguous space */
862           workt = work;
863           for (j=0; j<nz; j++) {
864             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
865             workt += bs;
866           }
867           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
869 
870           idiag -= bs2;
871           i2    -= bs;
872         }
873         break;
874       }
875       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
876     }
877   }
878   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 
884 /*
885     Special version for direct calls from Fortran (Used in PETSc-fun3d)
886 */
887 #if defined(PETSC_HAVE_FORTRAN_CAPS)
888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890 #define matsetvaluesblocked4_ matsetvaluesblocked4
891 #endif
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "matsetvaluesblocked4_"
895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896 {
897   Mat               A  = *AA;
898   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
899   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900   PetscInt          *ai    =a->i,*ailen=a->ilen;
901   PetscInt          *aj    =a->j,stepval,lastcol = -1;
902   const PetscScalar *value = v;
903   MatScalar         *ap,*aa = a->a,*bap;
904 
905   PetscFunctionBegin;
906   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907   stepval = (n-1)*4;
908   for (k=0; k<m; k++) { /* loop over added rows */
909     row  = im[k];
910     rp   = aj + ai[row];
911     ap   = aa + 16*ai[row];
912     nrow = ailen[row];
913     low  = 0;
914     high = nrow;
915     for (l=0; l<n; l++) { /* loop over added columns */
916       col = in[l];
917       if (col <= lastcol)  low = 0;
918       else                high = nrow;
919       lastcol = col;
920       value   = v + k*(stepval+4 + l)*4;
921       while (high-low > 7) {
922         t = (low+high)/2;
923         if (rp[t] > col) high = t;
924         else             low  = t;
925       }
926       for (i=low; i<high; i++) {
927         if (rp[i] > col) break;
928         if (rp[i] == col) {
929           bap = ap +  16*i;
930           for (ii=0; ii<4; ii++,value+=stepval) {
931             for (jj=ii; jj<16; jj+=4) {
932               bap[jj] += *value++;
933             }
934           }
935           goto noinsert2;
936         }
937       }
938       N = nrow++ - 1;
939       high++; /* added new column index thus must search to one higher than before */
940       /* shift up all the later entries in this row */
941       for (ii=N; ii>=i; ii--) {
942         rp[ii+1] = rp[ii];
943         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944       }
945       if (N >= i) {
946         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947       }
948       rp[i] = col;
949       bap   = ap +  16*i;
950       for (ii=0; ii<4; ii++,value+=stepval) {
951         for (jj=ii; jj<16; jj+=4) {
952           bap[jj] = *value++;
953         }
954       }
955       noinsert2:;
956       low = i;
957     }
958     ailen[row] = nrow;
959   }
960   PetscFunctionReturnVoid();
961 }
962 
963 #if defined(PETSC_HAVE_FORTRAN_CAPS)
964 #define matsetvalues4_ MATSETVALUES4
965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966 #define matsetvalues4_ matsetvalues4
967 #endif
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSetValues4_"
971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972 {
973   Mat         A  = *AA;
974   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976   PetscInt    *ai=a->i,*ailen=a->ilen;
977   PetscInt    *aj=a->j,brow,bcol;
978   PetscInt    ridx,cidx,lastcol = -1;
979   MatScalar   *ap,value,*aa=a->a,*bap;
980 
981   PetscFunctionBegin;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k]; brow = row/4;
984     rp   = aj + ai[brow];
985     ap   = aa + 16*ai[brow];
986     nrow = ailen[brow];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col   = in[l]; bcol = col/4;
991       ridx  = row % 4; cidx = col % 4;
992       value = v[l + k*n];
993       if (col <= lastcol)  low = 0;
994       else                high = nrow;
995       lastcol = col;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for (i=low; i<high; i++) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap   = ap +  16*i + 4*cidx + ridx;
1005           *bap += value;
1006           goto noinsert1;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N>=i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i]                    = bcol;
1020       ap[16*i + 4*cidx + ridx] = value;
1021 noinsert1:;
1022       low = i;
1023     }
1024     ailen[brow] = nrow;
1025   }
1026   PetscFunctionReturnVoid();
1027 }
1028 
1029 /*
1030      Checks for missing diagonals
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*ii = a->i,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !ii) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047   } else {
1048     diag = a->diag;
1049     for (i=0; i<a->mbs; i++) {
1050       if (diag[i] >= ii[i+1]) {
1051         *missing = PETSC_TRUE;
1052         if (d) *d = i;
1053         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054         break;
1055       }
1056     }
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = 0;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176 {
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (!ia) PetscFunctionReturn(0);
1181   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182     ierr = PetscFree(*ia);CHKERRQ(ierr);
1183     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191 {
1192   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198 #endif
1199   ierr = MatDestroy_Redundant(&A->redundant);CHKERRQ(ierr);
1200   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1201   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1202   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1203   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1204   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1205   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1206   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1207   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1208   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1209   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1210   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1211   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1212   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1213 
1214   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1215   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1216   ierr = PetscFree(A->data);CHKERRQ(ierr);
1217 
1218   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1235 {
1236   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   switch (op) {
1241   case MAT_ROW_ORIENTED:
1242     a->roworiented = flg;
1243     break;
1244   case MAT_KEEP_NONZERO_PATTERN:
1245     a->keepnonzeropattern = flg;
1246     break;
1247   case MAT_NEW_NONZERO_LOCATIONS:
1248     a->nonew = (flg ? 0 : 1);
1249     break;
1250   case MAT_NEW_NONZERO_LOCATION_ERR:
1251     a->nonew = (flg ? -1 : 0);
1252     break;
1253   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1254     a->nonew = (flg ? -2 : 0);
1255     break;
1256   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1257     a->nounused = (flg ? -1 : 0);
1258     break;
1259   case MAT_NEW_DIAGONALS:
1260   case MAT_IGNORE_OFF_PROC_ENTRIES:
1261   case MAT_USE_HASH_TABLE:
1262     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1263     break;
1264   case MAT_SPD:
1265   case MAT_SYMMETRIC:
1266   case MAT_STRUCTURALLY_SYMMETRIC:
1267   case MAT_HERMITIAN:
1268   case MAT_SYMMETRY_ETERNAL:
1269     /* These options are handled directly by MatSetOption() */
1270     break;
1271   default:
1272     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1280 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1281 {
1282   PetscErrorCode ierr;
1283   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1284   MatScalar      *aa_i;
1285   PetscScalar    *v_i;
1286 
1287   PetscFunctionBegin;
1288   bs  = A->rmap->bs;
1289   bs2 = bs*bs;
1290   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1291 
1292   bn  = row/bs;   /* Block number */
1293   bp  = row % bs; /* Block Position */
1294   M   = ai[bn+1] - ai[bn];
1295   *nz = bs*M;
1296 
1297   if (v) {
1298     *v = 0;
1299     if (*nz) {
1300       ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr);
1301       for (i=0; i<M; i++) { /* for each block in the block row */
1302         v_i  = *v + i*bs;
1303         aa_i = aa + bs2*(ai[bn] + i);
1304         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1305       }
1306     }
1307   }
1308 
1309   if (idx) {
1310     *idx = 0;
1311     if (*nz) {
1312       ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr);
1313       for (i=0; i<M; i++) { /* for each block in the block row */
1314         idx_i = *idx + i*bs;
1315         itmp  = bs*aj[ai[bn] + i];
1316         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1317       }
1318     }
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1325 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326 {
1327   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1337 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1338 {
1339   PetscErrorCode ierr;
1340 
1341   PetscFunctionBegin;
1342   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1343   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1351 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1352 {
1353   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1354   Mat            C;
1355   PetscErrorCode ierr;
1356   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1357   PetscInt       *rows,*cols,bs2=a->bs2;
1358   MatScalar      *array;
1359 
1360   PetscFunctionBegin;
1361   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1362   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1363     ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr);
1364 
1365     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1366     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1367     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1368     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1369     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1370     ierr = PetscFree(col);CHKERRQ(ierr);
1371   } else {
1372     C = *B;
1373   }
1374 
1375   array = a->a;
1376   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1377   for (i=0; i<mbs; i++) {
1378     cols[0] = i*bs;
1379     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1380     len = ai[i+1] - ai[i];
1381     for (j=0; j<len; j++) {
1382       rows[0] = (*aj++)*bs;
1383       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1384       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1385       array += bs2;
1386     }
1387   }
1388   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1389 
1390   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392 
1393   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1394     *B = C;
1395   } else {
1396     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1404 {
1405   PetscErrorCode ierr;
1406   Mat            Btrans;
1407 
1408   PetscFunctionBegin;
1409   *f   = PETSC_FALSE;
1410   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1411   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1412   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1418 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1419 {
1420   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1421   PetscErrorCode ierr;
1422   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1423   int            fd;
1424   PetscScalar    *aa;
1425   FILE           *file;
1426 
1427   PetscFunctionBegin;
1428   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1429   ierr        = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr);
1430   col_lens[0] = MAT_FILE_CLASSID;
1431 
1432   col_lens[1] = A->rmap->N;
1433   col_lens[2] = A->cmap->n;
1434   col_lens[3] = a->nz*bs2;
1435 
1436   /* store lengths of each row and write (including header) to file */
1437   count = 0;
1438   for (i=0; i<a->mbs; i++) {
1439     for (j=0; j<bs; j++) {
1440       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1441     }
1442   }
1443   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1444   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1445 
1446   /* store column indices (zero start index) */
1447   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1448   count = 0;
1449   for (i=0; i<a->mbs; i++) {
1450     for (j=0; j<bs; j++) {
1451       for (k=a->i[i]; k<a->i[i+1]; k++) {
1452         for (l=0; l<bs; l++) {
1453           jj[count++] = bs*a->j[k] + l;
1454         }
1455       }
1456     }
1457   }
1458   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1459   ierr = PetscFree(jj);CHKERRQ(ierr);
1460 
1461   /* store nonzero values */
1462   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1463   count = 0;
1464   for (i=0; i<a->mbs; i++) {
1465     for (j=0; j<bs; j++) {
1466       for (k=a->i[i]; k<a->i[i+1]; k++) {
1467         for (l=0; l<bs; l++) {
1468           aa[count++] = a->a[bs2*k + l*bs + j];
1469         }
1470       }
1471     }
1472   }
1473   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1474   ierr = PetscFree(aa);CHKERRQ(ierr);
1475 
1476   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1477   if (file) {
1478     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1485 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1486 {
1487   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1488   PetscErrorCode    ierr;
1489   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1490   PetscViewerFormat format;
1491 
1492   PetscFunctionBegin;
1493   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1494   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1495     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1496   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1497     Mat aij;
1498     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1499     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1500     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1501   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1502       PetscFunctionReturn(0);
1503   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1504     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1505     for (i=0; i<a->mbs; i++) {
1506       for (j=0; j<bs; j++) {
1507         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1508         for (k=a->i[i]; k<a->i[i+1]; k++) {
1509           for (l=0; l<bs; l++) {
1510 #if defined(PETSC_USE_COMPLEX)
1511             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1512               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1513                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1514             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1515               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1516                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1517             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1518               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1519             }
1520 #else
1521             if (a->a[bs2*k + l*bs + j] != 0.0) {
1522               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1523             }
1524 #endif
1525           }
1526         }
1527         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1528       }
1529     }
1530     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1531   } else {
1532     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1533     for (i=0; i<a->mbs; i++) {
1534       for (j=0; j<bs; j++) {
1535         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1536         for (k=a->i[i]; k<a->i[i+1]; k++) {
1537           for (l=0; l<bs; l++) {
1538 #if defined(PETSC_USE_COMPLEX)
1539             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1541                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1542             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1543               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1544                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1545             } else {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1547             }
1548 #else
1549             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1550 #endif
1551           }
1552         }
1553         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1554       }
1555     }
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1557   }
1558   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #include <petscdraw.h>
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1565 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1566 {
1567   Mat               A = (Mat) Aa;
1568   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1569   PetscErrorCode    ierr;
1570   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1571   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1572   MatScalar         *aa;
1573   PetscViewer       viewer;
1574   PetscViewerFormat format;
1575 
1576   PetscFunctionBegin;
1577   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1578   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1579 
1580   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1581 
1582   /* loop over matrix elements drawing boxes */
1583 
1584   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1585     color = PETSC_DRAW_BLUE;
1586     for (i=0,row=0; i<mbs; i++,row+=bs) {
1587       for (j=a->i[i]; j<a->i[i+1]; j++) {
1588         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1589         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1590         aa  = a->a + j*bs2;
1591         for (k=0; k<bs; k++) {
1592           for (l=0; l<bs; l++) {
1593             if (PetscRealPart(*aa++) >=  0.) continue;
1594             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1595           }
1596         }
1597       }
1598     }
1599     color = PETSC_DRAW_CYAN;
1600     for (i=0,row=0; i<mbs; i++,row+=bs) {
1601       for (j=a->i[i]; j<a->i[i+1]; j++) {
1602         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1603         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1604         aa  = a->a + j*bs2;
1605         for (k=0; k<bs; k++) {
1606           for (l=0; l<bs; l++) {
1607             if (PetscRealPart(*aa++) != 0.) continue;
1608             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1609           }
1610         }
1611       }
1612     }
1613     color = PETSC_DRAW_RED;
1614     for (i=0,row=0; i<mbs; i++,row+=bs) {
1615       for (j=a->i[i]; j<a->i[i+1]; j++) {
1616         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1617         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1618         aa  = a->a + j*bs2;
1619         for (k=0; k<bs; k++) {
1620           for (l=0; l<bs; l++) {
1621             if (PetscRealPart(*aa++) <= 0.) continue;
1622             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1623           }
1624         }
1625       }
1626     }
1627   } else {
1628     /* use contour shading to indicate magnitude of values */
1629     /* first determine max of all nonzero values */
1630     PetscDraw popup;
1631     PetscReal scale,maxv = 0.0;
1632 
1633     for (i=0; i<a->nz*a->bs2; i++) {
1634       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1635     }
1636     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1637     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1638     if (popup) {
1639       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1640     }
1641     for (i=0,row=0; i<mbs; i++,row+=bs) {
1642       for (j=a->i[i]; j<a->i[i+1]; j++) {
1643         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1644         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1645         aa  = a->a + j*bs2;
1646         for (k=0; k<bs; k++) {
1647           for (l=0; l<bs; l++) {
1648             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1649             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1650           }
1651         }
1652       }
1653     }
1654   }
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1660 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1661 {
1662   PetscErrorCode ierr;
1663   PetscReal      xl,yl,xr,yr,w,h;
1664   PetscDraw      draw;
1665   PetscBool      isnull;
1666 
1667   PetscFunctionBegin;
1668   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1669   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1670 
1671   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1672   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1673   xr  += w;    yr += h;  xl = -w;     yl = -h;
1674   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1675   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1676   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatView_SeqBAIJ"
1682 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1683 {
1684   PetscErrorCode ierr;
1685   PetscBool      iascii,isbinary,isdraw;
1686 
1687   PetscFunctionBegin;
1688   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1689   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1690   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1691   if (iascii) {
1692     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1693   } else if (isbinary) {
1694     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1695   } else if (isdraw) {
1696     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1697   } else {
1698     Mat B;
1699     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1700     ierr = MatView(B,viewer);CHKERRQ(ierr);
1701     ierr = MatDestroy(&B);CHKERRQ(ierr);
1702   }
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1709 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1710 {
1711   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1712   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1713   PetscInt    *ai = a->i,*ailen = a->ilen;
1714   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1715   MatScalar   *ap,*aa = a->a;
1716 
1717   PetscFunctionBegin;
1718   for (k=0; k<m; k++) { /* loop over rows */
1719     row = im[k]; brow = row/bs;
1720     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1721     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1722     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1723     nrow = ailen[brow];
1724     for (l=0; l<n; l++) { /* loop over columns */
1725       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1726       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1727       col  = in[l];
1728       bcol = col/bs;
1729       cidx = col%bs;
1730       ridx = row%bs;
1731       high = nrow;
1732       low  = 0; /* assume unsorted */
1733       while (high-low > 5) {
1734         t = (low+high)/2;
1735         if (rp[t] > bcol) high = t;
1736         else             low  = t;
1737       }
1738       for (i=low; i<high; i++) {
1739         if (rp[i] > bcol) break;
1740         if (rp[i] == bcol) {
1741           *v++ = ap[bs2*i+bs*cidx+ridx];
1742           goto finished;
1743         }
1744       }
1745       *v++ = 0.0;
1746 finished:;
1747     }
1748   }
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1754 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1755 {
1756   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1757   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1758   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1759   PetscErrorCode    ierr;
1760   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1761   PetscBool         roworiented=a->roworiented;
1762   const PetscScalar *value     = v;
1763   MatScalar         *ap,*aa = a->a,*bap;
1764 
1765   PetscFunctionBegin;
1766   if (roworiented) {
1767     stepval = (n-1)*bs;
1768   } else {
1769     stepval = (m-1)*bs;
1770   }
1771   for (k=0; k<m; k++) { /* loop over added rows */
1772     row = im[k];
1773     if (row < 0) continue;
1774 #if defined(PETSC_USE_DEBUG)
1775     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1776 #endif
1777     rp   = aj + ai[row];
1778     ap   = aa + bs2*ai[row];
1779     rmax = imax[row];
1780     nrow = ailen[row];
1781     low  = 0;
1782     high = nrow;
1783     for (l=0; l<n; l++) { /* loop over added columns */
1784       if (in[l] < 0) continue;
1785 #if defined(PETSC_USE_DEBUG)
1786       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1787 #endif
1788       col = in[l];
1789       if (roworiented) {
1790         value = v + (k*(stepval+bs) + l)*bs;
1791       } else {
1792         value = v + (l*(stepval+bs) + k)*bs;
1793       }
1794       if (col <= lastcol) low = 0;
1795       else high = nrow;
1796       lastcol = col;
1797       while (high-low > 7) {
1798         t = (low+high)/2;
1799         if (rp[t] > col) high = t;
1800         else             low  = t;
1801       }
1802       for (i=low; i<high; i++) {
1803         if (rp[i] > col) break;
1804         if (rp[i] == col) {
1805           bap = ap +  bs2*i;
1806           if (roworiented) {
1807             if (is == ADD_VALUES) {
1808               for (ii=0; ii<bs; ii++,value+=stepval) {
1809                 for (jj=ii; jj<bs2; jj+=bs) {
1810                   bap[jj] += *value++;
1811                 }
1812               }
1813             } else {
1814               for (ii=0; ii<bs; ii++,value+=stepval) {
1815                 for (jj=ii; jj<bs2; jj+=bs) {
1816                   bap[jj] = *value++;
1817                 }
1818               }
1819             }
1820           } else {
1821             if (is == ADD_VALUES) {
1822               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1823                 for (jj=0; jj<bs; jj++) {
1824                   bap[jj] += value[jj];
1825                 }
1826                 bap += bs;
1827               }
1828             } else {
1829               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1830                 for (jj=0; jj<bs; jj++) {
1831                   bap[jj]  = value[jj];
1832                 }
1833                 bap += bs;
1834               }
1835             }
1836           }
1837           goto noinsert2;
1838         }
1839       }
1840       if (nonew == 1) goto noinsert2;
1841       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1842       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1843       N = nrow++ - 1; high++;
1844       /* shift up all the later entries in this row */
1845       for (ii=N; ii>=i; ii--) {
1846         rp[ii+1] = rp[ii];
1847         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1848       }
1849       if (N >= i) {
1850         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1851       }
1852       rp[i] = col;
1853       bap   = ap +  bs2*i;
1854       if (roworiented) {
1855         for (ii=0; ii<bs; ii++,value+=stepval) {
1856           for (jj=ii; jj<bs2; jj+=bs) {
1857             bap[jj] = *value++;
1858           }
1859         }
1860       } else {
1861         for (ii=0; ii<bs; ii++,value+=stepval) {
1862           for (jj=0; jj<bs; jj++) {
1863             *bap++ = *value++;
1864           }
1865         }
1866       }
1867 noinsert2:;
1868       low = i;
1869     }
1870     ailen[row] = nrow;
1871   }
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1877 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1878 {
1879   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1880   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1881   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1882   PetscErrorCode ierr;
1883   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1884   MatScalar      *aa  = a->a,*ap;
1885   PetscReal      ratio=0.6;
1886 
1887   PetscFunctionBegin;
1888   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1889 
1890   if (m) rmax = ailen[0];
1891   for (i=1; i<mbs; i++) {
1892     /* move each row back by the amount of empty slots (fshift) before it*/
1893     fshift += imax[i-1] - ailen[i-1];
1894     rmax    = PetscMax(rmax,ailen[i]);
1895     if (fshift) {
1896       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1897       N  = ailen[i];
1898       for (j=0; j<N; j++) {
1899         ip[j-fshift] = ip[j];
1900 
1901         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1902       }
1903     }
1904     ai[i] = ai[i-1] + ailen[i-1];
1905   }
1906   if (mbs) {
1907     fshift += imax[mbs-1] - ailen[mbs-1];
1908     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1909   }
1910 
1911   /* reset ilen and imax for each row */
1912   a->nonzerorowcnt = 0;
1913   for (i=0; i<mbs; i++) {
1914     ailen[i] = imax[i] = ai[i+1] - ai[i];
1915     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1916   }
1917   a->nz = ai[mbs];
1918 
1919   /* diagonals may have moved, so kill the diagonal pointers */
1920   a->idiagvalid = PETSC_FALSE;
1921   if (fshift && a->diag) {
1922     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1923     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1924     a->diag = 0;
1925   }
1926   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1927   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1928   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1929   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1930 
1931   A->info.mallocs    += a->reallocs;
1932   a->reallocs         = 0;
1933   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1934   a->rmax             = rmax;
1935 
1936   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 /*
1941    This function returns an array of flags which indicate the locations of contiguous
1942    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1943    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1944    Assume: sizes should be long enough to hold all the values.
1945 */
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1948 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1949 {
1950   PetscInt  i,j,k,row;
1951   PetscBool flg;
1952 
1953   PetscFunctionBegin;
1954   for (i=0,j=0; i<n; j++) {
1955     row = idx[i];
1956     if (row%bs!=0) { /* Not the begining of a block */
1957       sizes[j] = 1;
1958       i++;
1959     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1960       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1961       i++;
1962     } else { /* Begining of the block, so check if the complete block exists */
1963       flg = PETSC_TRUE;
1964       for (k=1; k<bs; k++) {
1965         if (row+k != idx[i+k]) { /* break in the block */
1966           flg = PETSC_FALSE;
1967           break;
1968         }
1969       }
1970       if (flg) { /* No break in the bs */
1971         sizes[j] = bs;
1972         i       += bs;
1973       } else {
1974         sizes[j] = 1;
1975         i++;
1976       }
1977     }
1978   }
1979   *bs_max = j;
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1985 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1986 {
1987   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1988   PetscErrorCode    ierr;
1989   PetscInt          i,j,k,count,*rows;
1990   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1991   PetscScalar       zero = 0.0;
1992   MatScalar         *aa;
1993   const PetscScalar *xx;
1994   PetscScalar       *bb;
1995 
1996   PetscFunctionBegin;
1997   /* fix right hand side if needed */
1998   if (x && b) {
1999     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2000     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2001     for (i=0; i<is_n; i++) {
2002       bb[is_idx[i]] = diag*xx[is_idx[i]];
2003     }
2004     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2005     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2006   }
2007 
2008   /* Make a copy of the IS and  sort it */
2009   /* allocate memory for rows,sizes */
2010   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2011 
2012   /* copy IS values to rows, and sort them */
2013   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2014   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2015 
2016   if (baij->keepnonzeropattern) {
2017     for (i=0; i<is_n; i++) sizes[i] = 1;
2018     bs_max          = is_n;
2019   } else {
2020     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2021     A->nonzerostate++;
2022   }
2023 
2024   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2025     row = rows[j];
2026     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2027     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2028     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2029     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2030       if (diag != (PetscScalar)0.0) {
2031         if (baij->ilen[row/bs] > 0) {
2032           baij->ilen[row/bs]       = 1;
2033           baij->j[baij->i[row/bs]] = row/bs;
2034 
2035           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2036         }
2037         /* Now insert all the diagonal values for this bs */
2038         for (k=0; k<bs; k++) {
2039           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2040         }
2041       } else { /* (diag == 0.0) */
2042         baij->ilen[row/bs] = 0;
2043       } /* end (diag == 0.0) */
2044     } else { /* (sizes[i] != bs) */
2045 #if defined(PETSC_USE_DEBUG)
2046       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2047 #endif
2048       for (k=0; k<count; k++) {
2049         aa[0] =  zero;
2050         aa   += bs;
2051       }
2052       if (diag != (PetscScalar)0.0) {
2053         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2054       }
2055     }
2056   }
2057 
2058   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2059   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2065 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2066 {
2067   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2068   PetscErrorCode    ierr;
2069   PetscInt          i,j,k,count;
2070   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2071   PetscScalar       zero = 0.0;
2072   MatScalar         *aa;
2073   const PetscScalar *xx;
2074   PetscScalar       *bb;
2075   PetscBool         *zeroed,vecs = PETSC_FALSE;
2076 
2077   PetscFunctionBegin;
2078   /* fix right hand side if needed */
2079   if (x && b) {
2080     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2081     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2082     vecs = PETSC_TRUE;
2083   }
2084 
2085   /* zero the columns */
2086   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2087   for (i=0; i<is_n; i++) {
2088     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2089     zeroed[is_idx[i]] = PETSC_TRUE;
2090   }
2091   for (i=0; i<A->rmap->N; i++) {
2092     if (!zeroed[i]) {
2093       row = i/bs;
2094       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2095         for (k=0; k<bs; k++) {
2096           col = bs*baij->j[j] + k;
2097           if (zeroed[col]) {
2098             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2099             if (vecs) bb[i] -= aa[0]*xx[col];
2100             aa[0] = 0.0;
2101           }
2102         }
2103       }
2104     } else if (vecs) bb[i] = diag*xx[i];
2105   }
2106   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2107   if (vecs) {
2108     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2109     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2110   }
2111 
2112   /* zero the rows */
2113   for (i=0; i<is_n; i++) {
2114     row   = is_idx[i];
2115     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2116     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2117     for (k=0; k<count; k++) {
2118       aa[0] =  zero;
2119       aa   += bs;
2120     }
2121     if (diag != (PetscScalar)0.0) {
2122       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2123     }
2124   }
2125   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2131 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2132 {
2133   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2134   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2135   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2136   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2137   PetscErrorCode ierr;
2138   PetscInt       ridx,cidx,bs2=a->bs2;
2139   PetscBool      roworiented=a->roworiented;
2140   MatScalar      *ap,value,*aa=a->a,*bap;
2141 
2142   PetscFunctionBegin;
2143   for (k=0; k<m; k++) { /* loop over added rows */
2144     row  = im[k];
2145     brow = row/bs;
2146     if (row < 0) continue;
2147 #if defined(PETSC_USE_DEBUG)
2148     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2149 #endif
2150     rp   = aj + ai[brow];
2151     ap   = aa + bs2*ai[brow];
2152     rmax = imax[brow];
2153     nrow = ailen[brow];
2154     low  = 0;
2155     high = nrow;
2156     for (l=0; l<n; l++) { /* loop over added columns */
2157       if (in[l] < 0) continue;
2158 #if defined(PETSC_USE_DEBUG)
2159       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2160 #endif
2161       col  = in[l]; bcol = col/bs;
2162       ridx = row % bs; cidx = col % bs;
2163       if (roworiented) {
2164         value = v[l + k*n];
2165       } else {
2166         value = v[k + l*m];
2167       }
2168       if (col <= lastcol) low = 0; else high = nrow;
2169       lastcol = col;
2170       while (high-low > 7) {
2171         t = (low+high)/2;
2172         if (rp[t] > bcol) high = t;
2173         else              low  = t;
2174       }
2175       for (i=low; i<high; i++) {
2176         if (rp[i] > bcol) break;
2177         if (rp[i] == bcol) {
2178           bap = ap +  bs2*i + bs*cidx + ridx;
2179           if (is == ADD_VALUES) *bap += value;
2180           else                  *bap  = value;
2181           goto noinsert1;
2182         }
2183       }
2184       if (nonew == 1) goto noinsert1;
2185       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2186       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2187       N = nrow++ - 1; high++;
2188       /* shift up all the later entries in this row */
2189       for (ii=N; ii>=i; ii--) {
2190         rp[ii+1] = rp[ii];
2191         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2192       }
2193       if (N>=i) {
2194         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2195       }
2196       rp[i]                      = bcol;
2197       ap[bs2*i + bs*cidx + ridx] = value;
2198       a->nz++;
2199       A->nonzerostate++;
2200 noinsert1:;
2201       low = i;
2202     }
2203     ailen[brow] = nrow;
2204   }
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2210 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2211 {
2212   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2213   Mat            outA;
2214   PetscErrorCode ierr;
2215   PetscBool      row_identity,col_identity;
2216 
2217   PetscFunctionBegin;
2218   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2219   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2220   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2221   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2222 
2223   outA            = inA;
2224   inA->factortype = MAT_FACTOR_LU;
2225 
2226   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2227 
2228   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2229   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2230   a->row = row;
2231   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2232   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2233   a->col = col;
2234 
2235   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2236   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2237   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2238   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2239 
2240   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2241   if (!a->solve_work) {
2242     ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr);
2243     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2244   }
2245   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 #undef __FUNCT__
2250 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2251 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2252 {
2253   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2254   PetscInt    i,nz,mbs;
2255 
2256   PetscFunctionBegin;
2257   nz  = baij->maxnz;
2258   mbs = baij->mbs;
2259   for (i=0; i<nz; i++) {
2260     baij->j[i] = indices[i];
2261   }
2262   baij->nz = nz;
2263   for (i=0; i<mbs; i++) {
2264     baij->ilen[i] = baij->imax[i];
2265   }
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2271 /*@
2272     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2273        in the matrix.
2274 
2275   Input Parameters:
2276 +  mat - the SeqBAIJ matrix
2277 -  indices - the column indices
2278 
2279   Level: advanced
2280 
2281   Notes:
2282     This can be called if you have precomputed the nonzero structure of the
2283   matrix and want to provide it to the matrix object to improve the performance
2284   of the MatSetValues() operation.
2285 
2286     You MUST have set the correct numbers of nonzeros per row in the call to
2287   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2288 
2289     MUST be called before any calls to MatSetValues();
2290 
2291 @*/
2292 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2293 {
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2298   PetscValidPointer(indices,2);
2299   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2305 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2306 {
2307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2308   PetscErrorCode ierr;
2309   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2310   PetscReal      atmp;
2311   PetscScalar    *x,zero = 0.0;
2312   MatScalar      *aa;
2313   PetscInt       ncols,brow,krow,kcol;
2314 
2315   PetscFunctionBegin;
2316   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2317   bs  = A->rmap->bs;
2318   aa  = a->a;
2319   ai  = a->i;
2320   aj  = a->j;
2321   mbs = a->mbs;
2322 
2323   ierr = VecSet(v,zero);CHKERRQ(ierr);
2324   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2325   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2326   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2327   for (i=0; i<mbs; i++) {
2328     ncols = ai[1] - ai[0]; ai++;
2329     brow  = bs*i;
2330     for (j=0; j<ncols; j++) {
2331       for (kcol=0; kcol<bs; kcol++) {
2332         for (krow=0; krow<bs; krow++) {
2333           atmp = PetscAbsScalar(*aa);aa++;
2334           row  = brow + krow;   /* row index */
2335           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2336         }
2337       }
2338       aj++;
2339     }
2340   }
2341   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatCopy_SeqBAIJ"
2347 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2348 {
2349   PetscErrorCode ierr;
2350 
2351   PetscFunctionBegin;
2352   /* If the two matrices have the same copy implementation, use fast copy. */
2353   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2354     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2355     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2356     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2357 
2358     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2359     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2360     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2361   } else {
2362     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2363   }
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2369 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2370 {
2371   PetscErrorCode ierr;
2372 
2373   PetscFunctionBegin;
2374   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 #undef __FUNCT__
2379 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2380 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2381 {
2382   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2383 
2384   PetscFunctionBegin;
2385   *array = a->a;
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2391 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2392 {
2393   PetscFunctionBegin;
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2399 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2400 {
2401   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2402   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2403   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2404   PetscErrorCode ierr;
2405 
2406   PetscFunctionBegin;
2407   /* Set the number of nonzeros in the new matrix */
2408   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2414 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2415 {
2416   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2417   PetscErrorCode ierr;
2418   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2419   PetscBLASInt   one=1;
2420 
2421   PetscFunctionBegin;
2422   if (str == SAME_NONZERO_PATTERN) {
2423     PetscScalar  alpha = a;
2424     PetscBLASInt bnz;
2425     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2426     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2427     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2428   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2429     if (y->xtoy && y->XtoY != X) {
2430       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2431       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2432     }
2433     if (!y->xtoy) { /* get xtoy */
2434       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2435       y->XtoY = X;
2436       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2437     }
2438     for (i=0; i<x->nz; i++) {
2439       j = 0;
2440       while (j < bs2) {
2441         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2442         j++;
2443       }
2444     }
2445     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2446     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(double)((PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2447   } else {
2448     Mat      B;
2449     PetscInt *nnz;
2450     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2451     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2452     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2453     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2454     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2455     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2456     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2457     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2458     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2459     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2460     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2461     ierr = PetscFree(nnz);CHKERRQ(ierr);
2462   }
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2468 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2469 {
2470   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2471   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2472   MatScalar   *aa = a->a;
2473 
2474   PetscFunctionBegin;
2475   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 #undef __FUNCT__
2480 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2481 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2482 {
2483   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2484   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2485   MatScalar   *aa = a->a;
2486 
2487   PetscFunctionBegin;
2488   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2494 /*
2495     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2496 */
2497 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2498 {
2499   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2500   PetscErrorCode ierr;
2501   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2502   PetscInt       nz = a->i[m],row,*jj,mr,col;
2503 
2504   PetscFunctionBegin;
2505   *nn = n;
2506   if (!ia) PetscFunctionReturn(0);
2507   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2508   else {
2509     ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2510     ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2511     ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2512     jj   = a->j;
2513     for (i=0; i<nz; i++) {
2514       collengths[jj[i]]++;
2515     }
2516     cia[0] = oshift;
2517     for (i=0; i<n; i++) {
2518       cia[i+1] = cia[i] + collengths[i];
2519     }
2520     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2521     jj   = a->j;
2522     for (row=0; row<m; row++) {
2523       mr = a->i[row+1] - a->i[row];
2524       for (i=0; i<mr; i++) {
2525         col = *jj++;
2526 
2527         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2528       }
2529     }
2530     ierr = PetscFree(collengths);CHKERRQ(ierr);
2531     *ia  = cia; *ja = cja;
2532   }
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2538 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2539 {
2540   PetscErrorCode ierr;
2541 
2542   PetscFunctionBegin;
2543   if (!ia) PetscFunctionReturn(0);
2544   ierr = PetscFree(*ia);CHKERRQ(ierr);
2545   ierr = PetscFree(*ja);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 /*
2550  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2551  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2552  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2553  */
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2556 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2557 {
2558   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2559   PetscErrorCode ierr;
2560   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2561   PetscInt       nz = a->i[m],row,*jj,mr,col;
2562   PetscInt       *cspidx;
2563 
2564   PetscFunctionBegin;
2565   *nn = n;
2566   if (!ia) PetscFunctionReturn(0);
2567 
2568   ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2569   ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2570   ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2571   ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr);
2572   jj   = a->j;
2573   for (i=0; i<nz; i++) {
2574     collengths[jj[i]]++;
2575   }
2576   cia[0] = oshift;
2577   for (i=0; i<n; i++) {
2578     cia[i+1] = cia[i] + collengths[i];
2579   }
2580   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2581   jj   = a->j;
2582   for (row=0; row<m; row++) {
2583     mr = a->i[row+1] - a->i[row];
2584     for (i=0; i<mr; i++) {
2585       col = *jj++;
2586       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2587       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2588     }
2589   }
2590   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2591   *ia    = cia; *ja = cja;
2592   *spidx = cspidx;
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2598 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2599 {
2600   PetscErrorCode ierr;
2601 
2602   PetscFunctionBegin;
2603   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2604   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 /* -------------------------------------------------------------------*/
2609 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2610                                        MatGetRow_SeqBAIJ,
2611                                        MatRestoreRow_SeqBAIJ,
2612                                        MatMult_SeqBAIJ_N,
2613                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2614                                        MatMultTranspose_SeqBAIJ,
2615                                        MatMultTransposeAdd_SeqBAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                /* 10*/ 0,
2620                                        MatLUFactor_SeqBAIJ,
2621                                        0,
2622                                        0,
2623                                        MatTranspose_SeqBAIJ,
2624                                /* 15*/ MatGetInfo_SeqBAIJ,
2625                                        MatEqual_SeqBAIJ,
2626                                        MatGetDiagonal_SeqBAIJ,
2627                                        MatDiagonalScale_SeqBAIJ,
2628                                        MatNorm_SeqBAIJ,
2629                                /* 20*/ 0,
2630                                        MatAssemblyEnd_SeqBAIJ,
2631                                        MatSetOption_SeqBAIJ,
2632                                        MatZeroEntries_SeqBAIJ,
2633                                /* 24*/ MatZeroRows_SeqBAIJ,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                /* 29*/ MatSetUp_SeqBAIJ,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                /* 34*/ MatDuplicate_SeqBAIJ,
2644                                        0,
2645                                        0,
2646                                        MatILUFactor_SeqBAIJ,
2647                                        0,
2648                                /* 39*/ MatAXPY_SeqBAIJ,
2649                                        MatGetSubMatrices_SeqBAIJ,
2650                                        MatIncreaseOverlap_SeqBAIJ,
2651                                        MatGetValues_SeqBAIJ,
2652                                        MatCopy_SeqBAIJ,
2653                                /* 44*/ 0,
2654                                        MatScale_SeqBAIJ,
2655                                        0,
2656                                        0,
2657                                        MatZeroRowsColumns_SeqBAIJ,
2658                                /* 49*/ 0,
2659                                        MatGetRowIJ_SeqBAIJ,
2660                                        MatRestoreRowIJ_SeqBAIJ,
2661                                        MatGetColumnIJ_SeqBAIJ,
2662                                        MatRestoreColumnIJ_SeqBAIJ,
2663                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        MatSetValuesBlocked_SeqBAIJ,
2668                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2669                                        MatDestroy_SeqBAIJ,
2670                                        MatView_SeqBAIJ,
2671                                        0,
2672                                        0,
2673                                /* 64*/ 0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2679                                        0,
2680                                        MatConvert_Basic,
2681                                        0,
2682                                        0,
2683                                /* 74*/ 0,
2684                                        MatFDColoringApply_BAIJ,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                /* 79*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        MatLoad_SeqBAIJ,
2693                                /* 84*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /* 89*/ 0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                /* 94*/ 0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                /* 99*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                /*104*/ 0,
2714                                        MatRealPart_SeqBAIJ,
2715                                        MatImaginaryPart_SeqBAIJ,
2716                                        0,
2717                                        0,
2718                                /*109*/ 0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                        MatMissingDiagonal_SeqBAIJ,
2723                                /*114*/ 0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                /*119*/ 0,
2729                                        0,
2730                                        MatMultHermitianTranspose_SeqBAIJ,
2731                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2732                                        0,
2733                                /*124*/ 0,
2734                                        0,
2735                                        MatInvertBlockDiagonal_SeqBAIJ,
2736                                        0,
2737                                        0,
2738                                /*129*/ 0,
2739                                        0,
2740                                        0,
2741                                        0,
2742                                        0,
2743                                /*134*/ 0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                        0,
2748                                /*139*/ 0,
2749                                        0,
2750                                        0,
2751                                        MatFDColoringSetUp_SeqXAIJ,
2752                                        0,
2753                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2754 };
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2758 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2759 {
2760   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2761   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2762   PetscErrorCode ierr;
2763 
2764   PetscFunctionBegin;
2765   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2766 
2767   /* allocate space for values if not already there */
2768   if (!aij->saved_values) {
2769     ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr);
2770     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2771   }
2772 
2773   /* copy values over */
2774   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2780 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2781 {
2782   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2783   PetscErrorCode ierr;
2784   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2785 
2786   PetscFunctionBegin;
2787   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2788   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2789 
2790   /* copy values over */
2791   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2796 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2800 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2801 {
2802   Mat_SeqBAIJ    *b;
2803   PetscErrorCode ierr;
2804   PetscInt       i,mbs,nbs,bs2;
2805   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2806 
2807   PetscFunctionBegin;
2808   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2809   if (nz == MAT_SKIP_ALLOCATION) {
2810     skipallocation = PETSC_TRUE;
2811     nz             = 0;
2812   }
2813 
2814   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2815   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2817   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2818 
2819   B->preallocated = PETSC_TRUE;
2820 
2821   mbs = B->rmap->n/bs;
2822   nbs = B->cmap->n/bs;
2823   bs2 = bs*bs;
2824 
2825   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2826 
2827   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2828   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2829   if (nnz) {
2830     for (i=0; i<mbs; i++) {
2831       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2832       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2833     }
2834   }
2835 
2836   b    = (Mat_SeqBAIJ*)B->data;
2837   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2838   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2839   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2840 
2841   if (!flg) {
2842     switch (bs) {
2843     case 1:
2844       B->ops->mult    = MatMult_SeqBAIJ_1;
2845       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2846       break;
2847     case 2:
2848       B->ops->mult    = MatMult_SeqBAIJ_2;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2850       break;
2851     case 3:
2852       B->ops->mult    = MatMult_SeqBAIJ_3;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2854       break;
2855     case 4:
2856       B->ops->mult    = MatMult_SeqBAIJ_4;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2858       break;
2859     case 5:
2860       B->ops->mult    = MatMult_SeqBAIJ_5;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2862       break;
2863     case 6:
2864       B->ops->mult    = MatMult_SeqBAIJ_6;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2866       break;
2867     case 7:
2868       B->ops->mult    = MatMult_SeqBAIJ_7;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2870       break;
2871     case 15:
2872       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2874       break;
2875     default:
2876       B->ops->mult    = MatMult_SeqBAIJ_N;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2878       break;
2879     }
2880   }
2881   B->ops->sor = MatSOR_SeqBAIJ;
2882   b->mbs = mbs;
2883   b->nbs = nbs;
2884   if (!skipallocation) {
2885     if (!b->imax) {
2886       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2887       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2888 
2889       b->free_imax_ilen = PETSC_TRUE;
2890     }
2891     /* b->ilen will count nonzeros in each block row so far. */
2892     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2893     if (!nnz) {
2894       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2895       else if (nz < 0) nz = 1;
2896       for (i=0; i<mbs; i++) b->imax[i] = nz;
2897       nz = nz*mbs;
2898     } else {
2899       nz = 0;
2900       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2901     }
2902 
2903     /* allocate the matrix space */
2904     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2905     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2906     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2907     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2908     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2909 
2910     b->singlemalloc = PETSC_TRUE;
2911     b->i[0]         = 0;
2912     for (i=1; i<mbs+1; i++) {
2913       b->i[i] = b->i[i-1] + b->imax[i-1];
2914     }
2915     b->free_a  = PETSC_TRUE;
2916     b->free_ij = PETSC_TRUE;
2917 #if defined(PETSC_THREADCOMM_ACTIVE)
2918     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2919 #endif
2920   } else {
2921     b->free_a  = PETSC_FALSE;
2922     b->free_ij = PETSC_FALSE;
2923   }
2924 
2925   b->bs2              = bs2;
2926   b->mbs              = mbs;
2927   b->nz               = 0;
2928   b->maxnz            = nz;
2929   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2930   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2936 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2937 {
2938   PetscInt       i,m,nz,nz_max=0,*nnz;
2939   PetscScalar    *values=0;
2940   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2941   PetscErrorCode ierr;
2942 
2943   PetscFunctionBegin;
2944   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2945   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2946   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2947   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2948   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2949   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2950   m    = B->rmap->n/bs;
2951 
2952   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2953   ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr);
2954   for (i=0; i<m; i++) {
2955     nz = ii[i+1]- ii[i];
2956     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2957     nz_max = PetscMax(nz_max, nz);
2958     nnz[i] = nz;
2959   }
2960   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2961   ierr = PetscFree(nnz);CHKERRQ(ierr);
2962 
2963   values = (PetscScalar*)V;
2964   if (!values) {
2965     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2966   }
2967   for (i=0; i<m; i++) {
2968     PetscInt          ncols  = ii[i+1] - ii[i];
2969     const PetscInt    *icols = jj + ii[i];
2970     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2971     if (!roworiented) {
2972       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2973     } else {
2974       PetscInt j;
2975       for (j=0; j<ncols; j++) {
2976         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2977         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2978       }
2979     }
2980   }
2981   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2982   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2983   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2984   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2989 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
2990 #if defined(PETSC_HAVE_MUMPS)
2991 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2992 #endif
2993 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
2994 
2995 /*MC
2996    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2997    block sparse compressed row format.
2998 
2999    Options Database Keys:
3000 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3001 
3002   Level: beginner
3003 
3004 .seealso: MatCreateSeqBAIJ()
3005 M*/
3006 
3007 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatCreate_SeqBAIJ"
3011 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3012 {
3013   PetscErrorCode ierr;
3014   PetscMPIInt    size;
3015   Mat_SeqBAIJ    *b;
3016 
3017   PetscFunctionBegin;
3018   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3019   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3020 
3021   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3022   B->data = (void*)b;
3023   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3024 
3025   b->row          = 0;
3026   b->col          = 0;
3027   b->icol         = 0;
3028   b->reallocs     = 0;
3029   b->saved_values = 0;
3030 
3031   b->roworiented        = PETSC_TRUE;
3032   b->nonew              = 0;
3033   b->diag               = 0;
3034   b->solve_work         = 0;
3035   b->mult_work          = 0;
3036   B->spptr              = 0;
3037   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3038   b->keepnonzeropattern = PETSC_FALSE;
3039   b->xtoy               = 0;
3040   b->XtoY               = 0;
3041 
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3045 #if defined(PETSC_HAVE_MUMPS)
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3047 #endif
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 #undef __FUNCT__
3063 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3064 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3065 {
3066   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3067   PetscErrorCode ierr;
3068   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3069 
3070   PetscFunctionBegin;
3071   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3072 
3073   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3074     c->imax           = a->imax;
3075     c->ilen           = a->ilen;
3076     c->free_imax_ilen = PETSC_FALSE;
3077   } else {
3078     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3079     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3080     for (i=0; i<mbs; i++) {
3081       c->imax[i] = a->imax[i];
3082       c->ilen[i] = a->ilen[i];
3083     }
3084     c->free_imax_ilen = PETSC_TRUE;
3085   }
3086 
3087   /* allocate the matrix space */
3088   if (mallocmatspace) {
3089     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3090       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3091       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3092 
3093       c->i            = a->i;
3094       c->j            = a->j;
3095       c->singlemalloc = PETSC_FALSE;
3096       c->free_a       = PETSC_TRUE;
3097       c->free_ij      = PETSC_FALSE;
3098       c->parent       = A;
3099       C->preallocated = PETSC_TRUE;
3100       C->assembled    = PETSC_TRUE;
3101 
3102       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3103       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3104       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3105     } else {
3106       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3107       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3108 
3109       c->singlemalloc = PETSC_TRUE;
3110       c->free_a       = PETSC_TRUE;
3111       c->free_ij      = PETSC_TRUE;
3112 
3113       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3114       if (mbs > 0) {
3115         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3116         if (cpvalues == MAT_COPY_VALUES) {
3117           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3118         } else {
3119           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3120         }
3121       }
3122       C->preallocated = PETSC_TRUE;
3123       C->assembled    = PETSC_TRUE;
3124     }
3125   }
3126 
3127   c->roworiented = a->roworiented;
3128   c->nonew       = a->nonew;
3129 
3130   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3131   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3132 
3133   c->bs2         = a->bs2;
3134   c->mbs         = a->mbs;
3135   c->nbs         = a->nbs;
3136 
3137   if (a->diag) {
3138     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3139       c->diag      = a->diag;
3140       c->free_diag = PETSC_FALSE;
3141     } else {
3142       ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr);
3143       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3144       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3145       c->free_diag = PETSC_TRUE;
3146     }
3147   } else c->diag = 0;
3148 
3149   c->nz         = a->nz;
3150   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3151   c->solve_work = 0;
3152   c->mult_work  = 0;
3153 
3154   c->compressedrow.use   = a->compressedrow.use;
3155   c->compressedrow.nrows = a->compressedrow.nrows;
3156   if (a->compressedrow.use) {
3157     i    = a->compressedrow.nrows;
3158     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3159     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3160     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3161     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3162   } else {
3163     c->compressedrow.use    = PETSC_FALSE;
3164     c->compressedrow.i      = NULL;
3165     c->compressedrow.rindex = NULL;
3166   }
3167   C->nonzerostate = A->nonzerostate;
3168 
3169   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3170   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 #undef __FUNCT__
3175 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3176 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3177 {
3178   PetscErrorCode ierr;
3179 
3180   PetscFunctionBegin;
3181   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3182   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3183   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3184   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3185   PetscFunctionReturn(0);
3186 }
3187 
3188 #undef __FUNCT__
3189 #define __FUNCT__ "MatLoad_SeqBAIJ"
3190 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3191 {
3192   Mat_SeqBAIJ    *a;
3193   PetscErrorCode ierr;
3194   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3195   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3196   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3197   PetscInt       *masked,nmask,tmp,bs2,ishift;
3198   PetscMPIInt    size;
3199   int            fd;
3200   PetscScalar    *aa;
3201   MPI_Comm       comm;
3202 
3203   PetscFunctionBegin;
3204   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3205   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3206   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3207   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3208   bs2  = bs*bs;
3209 
3210   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3211   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3213   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3214   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3215   M = header[1]; N = header[2]; nz = header[3];
3216 
3217   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3218   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3219 
3220   /*
3221      This code adds extra rows to make sure the number of rows is
3222     divisible by the blocksize
3223   */
3224   mbs        = M/bs;
3225   extra_rows = bs - M + bs*(mbs);
3226   if (extra_rows == bs) extra_rows = 0;
3227   else mbs++;
3228   if (extra_rows) {
3229     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3230   }
3231 
3232   /* Set global sizes if not already set */
3233   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3234     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3235   } else { /* Check if the matrix global sizes are correct */
3236     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3237     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3238       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3239     }
3240     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3241   }
3242 
3243   /* read in row lengths */
3244   ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
3245   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3246   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3247 
3248   /* read in column indices */
3249   ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr);
3250   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3251   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3252 
3253   /* loop over row lengths determining block row lengths */
3254   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3255   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3256   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3257   rowcount = 0;
3258   nzcount  = 0;
3259   for (i=0; i<mbs; i++) {
3260     nmask = 0;
3261     for (j=0; j<bs; j++) {
3262       kmax = rowlengths[rowcount];
3263       for (k=0; k<kmax; k++) {
3264         tmp = jj[nzcount++]/bs;
3265         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3266       }
3267       rowcount++;
3268     }
3269     browlengths[i] += nmask;
3270     /* zero out the mask elements we set */
3271     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3272   }
3273 
3274   /* Do preallocation  */
3275   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3276   a    = (Mat_SeqBAIJ*)newmat->data;
3277 
3278   /* set matrix "i" values */
3279   a->i[0] = 0;
3280   for (i=1; i<= mbs; i++) {
3281     a->i[i]      = a->i[i-1] + browlengths[i-1];
3282     a->ilen[i-1] = browlengths[i-1];
3283   }
3284   a->nz = 0;
3285   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3286 
3287   /* read in nonzero values */
3288   ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr);
3289   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3290   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3291 
3292   /* set "a" and "j" values into matrix */
3293   nzcount = 0; jcount = 0;
3294   for (i=0; i<mbs; i++) {
3295     nzcountb = nzcount;
3296     nmask    = 0;
3297     for (j=0; j<bs; j++) {
3298       kmax = rowlengths[i*bs+j];
3299       for (k=0; k<kmax; k++) {
3300         tmp = jj[nzcount++]/bs;
3301         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3302       }
3303     }
3304     /* sort the masked values */
3305     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3306 
3307     /* set "j" values into matrix */
3308     maskcount = 1;
3309     for (j=0; j<nmask; j++) {
3310       a->j[jcount++]  = masked[j];
3311       mask[masked[j]] = maskcount++;
3312     }
3313     /* set "a" values into matrix */
3314     ishift = bs2*a->i[i];
3315     for (j=0; j<bs; j++) {
3316       kmax = rowlengths[i*bs+j];
3317       for (k=0; k<kmax; k++) {
3318         tmp       = jj[nzcountb]/bs;
3319         block     = mask[tmp] - 1;
3320         point     = jj[nzcountb] - bs*tmp;
3321         idx       = ishift + bs2*block + j + bs*point;
3322         a->a[idx] = (MatScalar)aa[nzcountb++];
3323       }
3324     }
3325     /* zero out the mask elements we set */
3326     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3327   }
3328   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3329 
3330   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3331   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3332   ierr = PetscFree(aa);CHKERRQ(ierr);
3333   ierr = PetscFree(jj);CHKERRQ(ierr);
3334   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3335 
3336   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3337   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatCreateSeqBAIJ"
3343 /*@C
3344    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3345    compressed row) format.  For good matrix assembly performance the
3346    user should preallocate the matrix storage by setting the parameter nz
3347    (or the array nnz).  By setting these parameters accurately, performance
3348    during matrix assembly can be increased by more than a factor of 50.
3349 
3350    Collective on MPI_Comm
3351 
3352    Input Parameters:
3353 +  comm - MPI communicator, set to PETSC_COMM_SELF
3354 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3355           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3356 .  m - number of rows
3357 .  n - number of columns
3358 .  nz - number of nonzero blocks  per block row (same for all rows)
3359 -  nnz - array containing the number of nonzero blocks in the various block rows
3360          (possibly different for each block row) or NULL
3361 
3362    Output Parameter:
3363 .  A - the matrix
3364 
3365    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3366    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3367    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3368 
3369    Options Database Keys:
3370 .   -mat_no_unroll - uses code that does not unroll the loops in the
3371                      block calculations (much slower)
3372 .    -mat_block_size - size of the blocks to use
3373 
3374    Level: intermediate
3375 
3376    Notes:
3377    The number of rows and columns must be divisible by blocksize.
3378 
3379    If the nnz parameter is given then the nz parameter is ignored
3380 
3381    A nonzero block is any block that as 1 or more nonzeros in it
3382 
3383    The block AIJ format is fully compatible with standard Fortran 77
3384    storage.  That is, the stored row and column indices can begin at
3385    either one (as in Fortran) or zero.  See the users' manual for details.
3386 
3387    Specify the preallocated storage with either nz or nnz (not both).
3388    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3389    allocation.  See Users-Manual: ch_mat for details.
3390    matrices.
3391 
3392 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3393 @*/
3394 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3395 {
3396   PetscErrorCode ierr;
3397 
3398   PetscFunctionBegin;
3399   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3400   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3401   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3402   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3403   PetscFunctionReturn(0);
3404 }
3405 
3406 #undef __FUNCT__
3407 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3408 /*@C
3409    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3410    per row in the matrix. For good matrix assembly performance the
3411    user should preallocate the matrix storage by setting the parameter nz
3412    (or the array nnz).  By setting these parameters accurately, performance
3413    during matrix assembly can be increased by more than a factor of 50.
3414 
3415    Collective on MPI_Comm
3416 
3417    Input Parameters:
3418 +  B - the matrix
3419 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3420           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3421 .  nz - number of block nonzeros per block row (same for all rows)
3422 -  nnz - array containing the number of block nonzeros in the various block rows
3423          (possibly different for each block row) or NULL
3424 
3425    Options Database Keys:
3426 .   -mat_no_unroll - uses code that does not unroll the loops in the
3427                      block calculations (much slower)
3428 .    -mat_block_size - size of the blocks to use
3429 
3430    Level: intermediate
3431 
3432    Notes:
3433    If the nnz parameter is given then the nz parameter is ignored
3434 
3435    You can call MatGetInfo() to get information on how effective the preallocation was;
3436    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3437    You can also run with the option -info and look for messages with the string
3438    malloc in them to see if additional memory allocation was needed.
3439 
3440    The block AIJ format is fully compatible with standard Fortran 77
3441    storage.  That is, the stored row and column indices can begin at
3442    either one (as in Fortran) or zero.  See the users' manual for details.
3443 
3444    Specify the preallocated storage with either nz or nnz (not both).
3445    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3446    allocation.  See Users-Manual: ch_mat for details.
3447 
3448 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3449 @*/
3450 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3451 {
3452   PetscErrorCode ierr;
3453 
3454   PetscFunctionBegin;
3455   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3456   PetscValidType(B,1);
3457   PetscValidLogicalCollectiveInt(B,bs,2);
3458   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3459   PetscFunctionReturn(0);
3460 }
3461 
3462 #undef __FUNCT__
3463 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3464 /*@C
3465    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3466    (the default sequential PETSc format).
3467 
3468    Collective on MPI_Comm
3469 
3470    Input Parameters:
3471 +  B - the matrix
3472 .  i - the indices into j for the start of each local row (starts with zero)
3473 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3474 -  v - optional values in the matrix
3475 
3476    Level: developer
3477 
3478    Notes:
3479    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3480    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3481    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3482    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3483    block column and the second index is over columns within a block.
3484 
3485 .keywords: matrix, aij, compressed row, sparse
3486 
3487 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3488 @*/
3489 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3490 {
3491   PetscErrorCode ierr;
3492 
3493   PetscFunctionBegin;
3494   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3495   PetscValidType(B,1);
3496   PetscValidLogicalCollectiveInt(B,bs,2);
3497   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 
3502 #undef __FUNCT__
3503 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3504 /*@
3505      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3506 
3507      Collective on MPI_Comm
3508 
3509    Input Parameters:
3510 +  comm - must be an MPI communicator of size 1
3511 .  bs - size of block
3512 .  m - number of rows
3513 .  n - number of columns
3514 .  i - row indices
3515 .  j - column indices
3516 -  a - matrix values
3517 
3518    Output Parameter:
3519 .  mat - the matrix
3520 
3521    Level: advanced
3522 
3523    Notes:
3524        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3525     once the matrix is destroyed
3526 
3527        You cannot set new nonzero locations into this matrix, that will generate an error.
3528 
3529        The i and j indices are 0 based
3530 
3531        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3532 
3533       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3534       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3535       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3536       with column-major ordering within blocks.
3537 
3538 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3539 
3540 @*/
3541 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3542 {
3543   PetscErrorCode ierr;
3544   PetscInt       ii;
3545   Mat_SeqBAIJ    *baij;
3546 
3547   PetscFunctionBegin;
3548   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3549   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3550 
3551   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3552   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3553   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3554   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3555   baij = (Mat_SeqBAIJ*)(*mat)->data;
3556   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3557   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3558 
3559   baij->i = i;
3560   baij->j = j;
3561   baij->a = a;
3562 
3563   baij->singlemalloc = PETSC_FALSE;
3564   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3565   baij->free_a       = PETSC_FALSE;
3566   baij->free_ij      = PETSC_FALSE;
3567 
3568   for (ii=0; ii<m; ii++) {
3569     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3570 #if defined(PETSC_USE_DEBUG)
3571     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3572 #endif
3573   }
3574 #if defined(PETSC_USE_DEBUG)
3575   for (ii=0; ii<baij->i[m]; ii++) {
3576     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3577     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3578   }
3579 #endif
3580 
3581   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3582   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3583   PetscFunctionReturn(0);
3584 }
3585 
3586 #undef __FUNCT__
3587 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3588 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3589 {
3590   PetscErrorCode ierr;
3591 
3592   PetscFunctionBegin;
3593   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3594   PetscFunctionReturn(0);
3595 }
3596