xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a593226baf5c94d3e4393e03cae3fa59cb94e469)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr);
156   }
157   work = a->mult_work;
158   t = work + k+1;
159   if (!a->sor_work) {
160     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
161   }
162   w = a->sor_work;
163 
164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
165   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
166 
167   if (flag & SOR_ZERO_INITIAL_GUESS) {
168     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169       switch (bs) {
170       case 1:
171         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172         t[0] = b[0];
173         i2     = 1;
174         idiag += 1;
175         for (i=1; i<m; i++) {
176           v  = aa + ai[i];
177           vi = aj + ai[i];
178           nz = diag[i] - ai[i];
179           s[0] = b[i2];
180           for (j=0; j<nz; j++) {
181             xw[0] = x[vi[j]];
182             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183           }
184           t[i2] = s[0];
185           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186           x[i2]  = xw[0];
187           idiag += 1;
188           i2    += 1;
189         }
190         break;
191       case 2:
192         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193         t[0] = b[0]; t[1] = b[1];
194         i2     = 2;
195         idiag += 4;
196         for (i=1; i<m; i++) {
197           v  = aa + 4*ai[i];
198           vi = aj + ai[i];
199           nz = diag[i] - ai[i];
200           s[0] = b[i2]; s[1] = b[i2+1];
201           for (j=0; j<nz; j++) {
202             idx = 2*vi[j];
203             it  = 4*j;
204             xw[0] = x[idx]; xw[1] = x[1+idx];
205             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206           }
207           t[i2] = s[0]; t[i2+1] = s[1];
208           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209           x[i2]   = xw[0]; x[i2+1] = xw[1];
210           idiag  += 4;
211           i2     += 2;
212         }
213         break;
214       case 3:
215         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217         i2     = 3;
218         idiag += 9;
219         for (i=1; i<m; i++) {
220           v  = aa + 9*ai[i];
221           vi = aj + ai[i];
222           nz = diag[i] - ai[i];
223           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224           while (nz--) {
225             idx = 3*(*vi++);
226             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228             v  += 9;
229           }
230           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233           idiag  += 9;
234           i2     += 3;
235         }
236         break;
237       case 4:
238         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240         i2     = 4;
241         idiag += 16;
242         for (i=1; i<m; i++) {
243           v  = aa + 16*ai[i];
244           vi = aj + ai[i];
245           nz = diag[i] - ai[i];
246           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247           while (nz--) {
248             idx = 4*(*vi++);
249             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251             v  += 16;
252           }
253           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256           idiag  += 16;
257           i2     += 4;
258         }
259         break;
260       case 5:
261         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263         i2     = 5;
264         idiag += 25;
265         for (i=1; i<m; i++) {
266           v  = aa + 25*ai[i];
267           vi = aj + ai[i];
268           nz = diag[i] - ai[i];
269           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270           while (nz--) {
271             idx = 5*(*vi++);
272             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274             v  += 25;
275           }
276           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279           idiag  += 25;
280           i2     += 5;
281         }
282         break;
283       case 6:
284         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286         i2     = 6;
287         idiag += 36;
288         for (i=1; i<m; i++) {
289           v  = aa + 36*ai[i];
290           vi = aj + ai[i];
291           nz = diag[i] - ai[i];
292           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293           while (nz--) {
294             idx = 6*(*vi++);
295             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
296             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298             v  += 36;
299           }
300           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304           idiag  += 36;
305           i2     += 6;
306         }
307         break;
308       case 7:
309         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312         i2     = 7;
313         idiag += 49;
314         for (i=1; i<m; i++) {
315           v  = aa + 49*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
319           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320           while (nz--) {
321             idx = 7*(*vi++);
322             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
323             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325             v  += 49;
326           }
327           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332           idiag  += 49;
333           i2     += 7;
334         }
335         break;
336       default:
337         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
339         i2     = bs;
340         idiag += bs2;
341         for (i=1; i<m; i++) {
342           v  = aa + bs2*ai[i];
343           vi = aj + ai[i];
344           nz = diag[i] - ai[i];
345 
346           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
347           /* copy all rows of x that are needed into contiguous space */
348           workt = work;
349           for (j=0; j<nz; j++) {
350             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
351             workt += bs;
352           }
353           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
356 
357           idiag += bs2;
358           i2    += bs;
359         }
360         break;
361       }
362       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
364       xb = t;
365     }
366     else xb = b;
367     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368       idiag = a->idiag+bs2*(a->mbs-1);
369       i2 = bs * (m-1);
370       switch (bs) {
371       case 1:
372         s[0]  = xb[i2];
373         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374         x[i2] = xw[0];
375         i2   -= 1;
376         for (i=m-2; i>=0; i--) {
377           v  = aa + (diag[i]+1);
378           vi = aj + diag[i] + 1;
379           nz = ai[i+1] - diag[i] - 1;
380           s[0] = xb[i2];
381           for (j=0; j<nz; j++) {
382             xw[0] = x[vi[j]];
383             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384           }
385           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386           x[i2]  = xw[0];
387           idiag -= 1;
388           i2    -= 1;
389         }
390         break;
391       case 2:
392         s[0]  = xb[i2]; s[1] = xb[i2+1];
393         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394         x[i2] = xw[0]; x[i2+1] = xw[1];
395         i2    -= 2;
396         idiag -= 4;
397         for (i=m-2; i>=0; i--) {
398           v  = aa + 4*(diag[i] + 1);
399           vi = aj + diag[i] + 1;
400           nz = ai[i+1] - diag[i] - 1;
401           s[0] = xb[i2]; s[1] = xb[i2+1];
402           for (j=0; j<nz; j++) {
403             idx = 2*vi[j];
404             it  = 4*j;
405             xw[0] = x[idx]; xw[1] = x[1+idx];
406             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407           }
408           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409           x[i2]   = xw[0]; x[i2+1] = xw[1];
410           idiag  -= 4;
411           i2     -= 2;
412         }
413         break;
414       case 3:
415         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418         i2    -= 3;
419         idiag -= 9;
420         for (i=m-2; i>=0; i--) {
421           v  = aa + 9*(diag[i]+1);
422           vi = aj + diag[i] + 1;
423           nz = ai[i+1] - diag[i] - 1;
424           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425           while (nz--) {
426             idx = 3*(*vi++);
427             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429             v  += 9;
430           }
431           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433           idiag  -= 9;
434           i2     -= 3;
435         }
436         break;
437       case 4:
438         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441         i2    -= 4;
442         idiag -= 16;
443         for (i=m-2; i>=0; i--) {
444           v  = aa + 16*(diag[i]+1);
445           vi = aj + diag[i] + 1;
446           nz = ai[i+1] - diag[i] - 1;
447           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448           while (nz--) {
449             idx = 4*(*vi++);
450             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452             v  += 16;
453           }
454           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456           idiag  -= 16;
457           i2     -= 4;
458         }
459         break;
460       case 5:
461         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464         i2    -= 5;
465         idiag -= 25;
466         for (i=m-2; i>=0; i--) {
467           v  = aa + 25*(diag[i]+1);
468           vi = aj + diag[i] + 1;
469           nz = ai[i+1] - diag[i] - 1;
470           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471           while (nz--) {
472             idx = 5*(*vi++);
473             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475             v  += 25;
476           }
477           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479           idiag  -= 25;
480           i2     -= 5;
481         }
482         break;
483       case 6:
484         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487         i2    -= 6;
488         idiag -= 36;
489         for (i=m-2; i>=0; i--) {
490           v  = aa + 36*(diag[i]+1);
491           vi = aj + diag[i] + 1;
492           nz = ai[i+1] - diag[i] - 1;
493           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494           while (nz--) {
495             idx = 6*(*vi++);
496             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
497             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499             v  += 36;
500           }
501           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503           idiag  -= 36;
504           i2     -= 6;
505         }
506         break;
507       case 7:
508         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
509         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513         i2    -= 7;
514         idiag -= 49;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 49*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521           while (nz--) {
522             idx = 7*(*vi++);
523             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
524             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526             v  += 49;
527           }
528           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531           idiag  -= 49;
532           i2     -= 7;
533         }
534         break;
535       default:
536         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
537         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538         i2    -= bs;
539         idiag -= bs2;
540         for (i=m-2; i>=0; i--) {
541           v  = aa + bs2*(diag[i]+1);
542           vi = aj + diag[i] + 1;
543           nz = ai[i+1] - diag[i] - 1;
544 
545           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546           /* copy all rows of x that are needed into contiguous space */
547           workt = work;
548           for (j=0; j<nz; j++) {
549             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
550             workt += bs;
551           }
552           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
554 
555           idiag -= bs2;
556           i2    -= bs;
557         }
558         break;
559       }
560       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
561     }
562     its--;
563   }
564   while (its--) {
565     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566       idiag = a->idiag;
567       i2 = 0;
568       switch (bs) {
569       case 1:
570         for (i=0; i<m; i++) {
571           v  = aa + ai[i];
572           vi = aj + ai[i];
573           nz = ai[i+1] - ai[i];
574           s[0] = b[i2];
575           for (j=0; j<nz; j++) {
576             xw[0] = x[vi[j]];
577             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578           }
579           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580           x[i2] += xw[0];
581           idiag += 1;
582           i2    += 1;
583         }
584         break;
585       case 2:
586         for (i=0; i<m; i++) {
587           v  = aa + 4*ai[i];
588           vi = aj + ai[i];
589           nz = ai[i+1] - ai[i];
590           s[0] = b[i2]; s[1] = b[i2+1];
591           for (j=0; j<nz; j++) {
592             idx = 2*vi[j];
593             it  = 4*j;
594             xw[0] = x[idx]; xw[1] = x[1+idx];
595             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596           }
597           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598           x[i2]  += xw[0]; x[i2+1] += xw[1];
599           idiag  += 4;
600           i2     += 2;
601         }
602         break;
603       case 3:
604         for (i=0; i<m; i++) {
605           v  = aa + 9*ai[i];
606           vi = aj + ai[i];
607           nz = ai[i+1] - ai[i];
608           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609           while (nz--) {
610             idx = 3*(*vi++);
611             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613             v  += 9;
614           }
615           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617           idiag  += 9;
618           i2     += 3;
619         }
620         break;
621       case 4:
622         for (i=0; i<m; i++) {
623           v  = aa + 16*ai[i];
624           vi = aj + ai[i];
625           nz = ai[i+1] - ai[i];
626           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627           while (nz--) {
628             idx = 4*(*vi++);
629             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631             v  += 16;
632           }
633           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635           idiag  += 16;
636           i2     += 4;
637         }
638         break;
639       case 5:
640         for (i=0; i<m; i++) {
641           v  = aa + 25*ai[i];
642           vi = aj + ai[i];
643           nz = ai[i+1] - ai[i];
644           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645           while (nz--) {
646             idx = 5*(*vi++);
647             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649             v  += 25;
650           }
651           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653           idiag  += 25;
654           i2     += 5;
655         }
656         break;
657       case 6:
658         for (i=0; i<m; i++) {
659           v  = aa + 36*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663           while (nz--) {
664             idx = 6*(*vi++);
665             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
666             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668             v  += 36;
669           }
670           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673           idiag  += 36;
674           i2     += 6;
675         }
676         break;
677       case 7:
678         for (i=0; i<m; i++) {
679           v  = aa + 49*ai[i];
680           vi = aj + ai[i];
681           nz = ai[i+1] - ai[i];
682           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
683           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684           while (nz--) {
685             idx = 7*(*vi++);
686             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689             v  += 49;
690           }
691           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694           idiag  += 49;
695           i2     += 7;
696         }
697         break;
698       default:
699         for (i=0; i<m; i++) {
700           v  = aa + bs2*ai[i];
701           vi = aj + ai[i];
702           nz = ai[i+1] - ai[i];
703 
704           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
705           /* copy all rows of x that are needed into contiguous space */
706           workt = work;
707           for (j=0; j<nz; j++) {
708             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
709             workt += bs;
710           }
711           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
713 
714           idiag += bs2;
715           i2    += bs;
716         }
717         break;
718       }
719       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
720     }
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = a->idiag+bs2*(a->mbs-1);
723       i2 = bs * (m-1);
724       switch (bs) {
725       case 1:
726         for (i=m-1; i>=0; i--) {
727           v  = aa + ai[i];
728           vi = aj + ai[i];
729           nz = ai[i+1] - ai[i];
730           s[0] = b[i2];
731           for (j=0; j<nz; j++) {
732             xw[0] = x[vi[j]];
733             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734           }
735           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736           x[i2] += xw[0];
737           idiag -= 1;
738           i2    -= 1;
739         }
740         break;
741       case 2:
742         for (i=m-1; i>=0; i--) {
743           v  = aa + 4*ai[i];
744           vi = aj + ai[i];
745           nz = ai[i+1] - ai[i];
746           s[0] = b[i2]; s[1] = b[i2+1];
747           for (j=0; j<nz; j++) {
748             idx = 2*vi[j];
749             it  = 4*j;
750             xw[0] = x[idx]; xw[1] = x[1+idx];
751             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752           }
753           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754           x[i2]  += xw[0]; x[i2+1] += xw[1];
755           idiag  -= 4;
756           i2     -= 2;
757         }
758         break;
759       case 3:
760         for (i=m-1; i>=0; i--) {
761           v  = aa + 9*ai[i];
762           vi = aj + ai[i];
763           nz = ai[i+1] - ai[i];
764           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765           while (nz--) {
766             idx = 3*(*vi++);
767             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769             v  += 9;
770           }
771           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773           idiag  -= 9;
774           i2     -= 3;
775         }
776         break;
777       case 4:
778         for (i=m-1; i>=0; i--) {
779           v  = aa + 16*ai[i];
780           vi = aj + ai[i];
781           nz = ai[i+1] - ai[i];
782           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783           while (nz--) {
784             idx = 4*(*vi++);
785             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787             v  += 16;
788           }
789           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791           idiag  -= 16;
792           i2     -= 4;
793         }
794         break;
795       case 5:
796         for (i=m-1; i>=0; i--) {
797           v  = aa + 25*ai[i];
798           vi = aj + ai[i];
799           nz = ai[i+1] - ai[i];
800           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801           while (nz--) {
802             idx = 5*(*vi++);
803             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805             v  += 25;
806           }
807           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809           idiag  -= 25;
810           i2     -= 5;
811         }
812         break;
813       case 6:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 36*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819           while (nz--) {
820             idx = 6*(*vi++);
821             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
822             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824             v  += 36;
825           }
826           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829           idiag  -= 36;
830           i2     -= 6;
831         }
832         break;
833       case 7:
834         for (i=m-1; i>=0; i--) {
835           v  = aa + 49*ai[i];
836           vi = aj + ai[i];
837           nz = ai[i+1] - ai[i];
838           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
839           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840           while (nz--) {
841             idx = 7*(*vi++);
842             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845             v  += 49;
846           }
847           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850           idiag  -= 49;
851           i2     -= 7;
852         }
853         break;
854       default:
855         for (i=m-1; i>=0; i--) {
856           v  = aa + bs2*ai[i];
857           vi = aj + ai[i];
858           nz = ai[i+1] - ai[i];
859 
860           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
861           /* copy all rows of x that are needed into contiguous space */
862           workt = work;
863           for (j=0; j<nz; j++) {
864             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
865             workt += bs;
866           }
867           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
869 
870           idiag -= bs2;
871           i2    -= bs;
872         }
873         break;
874       }
875       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
876     }
877   }
878   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 
884 /*
885     Special version for direct calls from Fortran (Used in PETSc-fun3d)
886 */
887 #if defined(PETSC_HAVE_FORTRAN_CAPS)
888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890 #define matsetvaluesblocked4_ matsetvaluesblocked4
891 #endif
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "matsetvaluesblocked4_"
895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896 {
897   Mat               A  = *AA;
898   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
899   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900   PetscInt          *ai    =a->i,*ailen=a->ilen;
901   PetscInt          *aj    =a->j,stepval,lastcol = -1;
902   const PetscScalar *value = v;
903   MatScalar         *ap,*aa = a->a,*bap;
904 
905   PetscFunctionBegin;
906   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907   stepval = (n-1)*4;
908   for (k=0; k<m; k++) { /* loop over added rows */
909     row  = im[k];
910     rp   = aj + ai[row];
911     ap   = aa + 16*ai[row];
912     nrow = ailen[row];
913     low  = 0;
914     high = nrow;
915     for (l=0; l<n; l++) { /* loop over added columns */
916       col = in[l];
917       if (col <= lastcol)  low = 0;
918       else                high = nrow;
919       lastcol = col;
920       value   = v + k*(stepval+4 + l)*4;
921       while (high-low > 7) {
922         t = (low+high)/2;
923         if (rp[t] > col) high = t;
924         else             low  = t;
925       }
926       for (i=low; i<high; i++) {
927         if (rp[i] > col) break;
928         if (rp[i] == col) {
929           bap = ap +  16*i;
930           for (ii=0; ii<4; ii++,value+=stepval) {
931             for (jj=ii; jj<16; jj+=4) {
932               bap[jj] += *value++;
933             }
934           }
935           goto noinsert2;
936         }
937       }
938       N = nrow++ - 1;
939       high++; /* added new column index thus must search to one higher than before */
940       /* shift up all the later entries in this row */
941       for (ii=N; ii>=i; ii--) {
942         rp[ii+1] = rp[ii];
943         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944       }
945       if (N >= i) {
946         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947       }
948       rp[i] = col;
949       bap   = ap +  16*i;
950       for (ii=0; ii<4; ii++,value+=stepval) {
951         for (jj=ii; jj<16; jj+=4) {
952           bap[jj] = *value++;
953         }
954       }
955       noinsert2:;
956       low = i;
957     }
958     ailen[row] = nrow;
959   }
960   PetscFunctionReturnVoid();
961 }
962 
963 #if defined(PETSC_HAVE_FORTRAN_CAPS)
964 #define matsetvalues4_ MATSETVALUES4
965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966 #define matsetvalues4_ matsetvalues4
967 #endif
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSetValues4_"
971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972 {
973   Mat         A  = *AA;
974   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976   PetscInt    *ai=a->i,*ailen=a->ilen;
977   PetscInt    *aj=a->j,brow,bcol;
978   PetscInt    ridx,cidx,lastcol = -1;
979   MatScalar   *ap,value,*aa=a->a,*bap;
980 
981   PetscFunctionBegin;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k]; brow = row/4;
984     rp   = aj + ai[brow];
985     ap   = aa + 16*ai[brow];
986     nrow = ailen[brow];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col   = in[l]; bcol = col/4;
991       ridx  = row % 4; cidx = col % 4;
992       value = v[l + k*n];
993       if (col <= lastcol)  low = 0;
994       else                high = nrow;
995       lastcol = col;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for (i=low; i<high; i++) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap   = ap +  16*i + 4*cidx + ridx;
1005           *bap += value;
1006           goto noinsert1;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N>=i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i]                    = bcol;
1020       ap[16*i + 4*cidx + ridx] = value;
1021 noinsert1:;
1022       low = i;
1023     }
1024     ailen[brow] = nrow;
1025   }
1026   PetscFunctionReturnVoid();
1027 }
1028 
1029 /*
1030      Checks for missing diagonals
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*ii = a->i,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !ii) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047   } else {
1048     diag = a->diag;
1049     for (i=0; i<a->mbs; i++) {
1050       if (diag[i] >= ii[i+1]) {
1051         *missing = PETSC_TRUE;
1052         if (d) *d = i;
1053         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054         break;
1055       }
1056     }
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = 0;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176 {
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (!ia) PetscFunctionReturn(0);
1181   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182     ierr = PetscFree(*ia);CHKERRQ(ierr);
1183     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191 {
1192   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198 #endif
1199   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1200   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1201   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1202   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1203   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1204   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1205   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1206   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1207   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1208   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1209   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1210   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1211   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1212 
1213   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1214   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1215   ierr = PetscFree(A->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1234 {
1235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   switch (op) {
1240   case MAT_ROW_ORIENTED:
1241     a->roworiented = flg;
1242     break;
1243   case MAT_KEEP_NONZERO_PATTERN:
1244     a->keepnonzeropattern = flg;
1245     break;
1246   case MAT_NEW_NONZERO_LOCATIONS:
1247     a->nonew = (flg ? 0 : 1);
1248     break;
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     a->nonew = (flg ? -1 : 0);
1251     break;
1252   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1253     a->nonew = (flg ? -2 : 0);
1254     break;
1255   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1256     a->nounused = (flg ? -1 : 0);
1257     break;
1258   case MAT_NEW_DIAGONALS:
1259   case MAT_IGNORE_OFF_PROC_ENTRIES:
1260   case MAT_USE_HASH_TABLE:
1261     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262     break;
1263   case MAT_SPD:
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268     /* These options are handled directly by MatSetOption() */
1269     break;
1270   default:
1271     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1280 {
1281   PetscErrorCode ierr;
1282   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1283   MatScalar      *aa_i;
1284   PetscScalar    *v_i;
1285 
1286   PetscFunctionBegin;
1287   bs  = A->rmap->bs;
1288   bs2 = bs*bs;
1289   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1290 
1291   bn  = row/bs;   /* Block number */
1292   bp  = row % bs; /* Block Position */
1293   M   = ai[bn+1] - ai[bn];
1294   *nz = bs*M;
1295 
1296   if (v) {
1297     *v = 0;
1298     if (*nz) {
1299       ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr);
1300       for (i=0; i<M; i++) { /* for each block in the block row */
1301         v_i  = *v + i*bs;
1302         aa_i = aa + bs2*(ai[bn] + i);
1303         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1304       }
1305     }
1306   }
1307 
1308   if (idx) {
1309     *idx = 0;
1310     if (*nz) {
1311       ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr);
1312       for (i=0; i<M; i++) { /* for each block in the block row */
1313         idx_i = *idx + i*bs;
1314         itmp  = bs*aj[ai[bn] + i];
1315         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325 {
1326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1336 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1337 {
1338   PetscErrorCode ierr;
1339 
1340   PetscFunctionBegin;
1341   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1342   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1350 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1351 {
1352   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1353   Mat            C;
1354   PetscErrorCode ierr;
1355   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1356   PetscInt       *rows,*cols,bs2=a->bs2;
1357   MatScalar      *array;
1358 
1359   PetscFunctionBegin;
1360   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1361   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1362     ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr);
1363 
1364     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1365     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1366     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1367     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1368     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1369     ierr = PetscFree(col);CHKERRQ(ierr);
1370   } else {
1371     C = *B;
1372   }
1373 
1374   array = a->a;
1375   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1376   for (i=0; i<mbs; i++) {
1377     cols[0] = i*bs;
1378     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1379     len = ai[i+1] - ai[i];
1380     for (j=0; j<len; j++) {
1381       rows[0] = (*aj++)*bs;
1382       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1383       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1384       array += bs2;
1385     }
1386   }
1387   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1388 
1389   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391 
1392   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1393     *B = C;
1394   } else {
1395     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1396   }
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1402 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1403 {
1404   PetscErrorCode ierr;
1405   Mat            Btrans;
1406 
1407   PetscFunctionBegin;
1408   *f   = PETSC_FALSE;
1409   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1410   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1411   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1417 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1418 {
1419   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1420   PetscErrorCode ierr;
1421   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1422   int            fd;
1423   PetscScalar    *aa;
1424   FILE           *file;
1425 
1426   PetscFunctionBegin;
1427   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1428   ierr        = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr);
1429   col_lens[0] = MAT_FILE_CLASSID;
1430 
1431   col_lens[1] = A->rmap->N;
1432   col_lens[2] = A->cmap->n;
1433   col_lens[3] = a->nz*bs2;
1434 
1435   /* store lengths of each row and write (including header) to file */
1436   count = 0;
1437   for (i=0; i<a->mbs; i++) {
1438     for (j=0; j<bs; j++) {
1439       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1440     }
1441   }
1442   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1443   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1444 
1445   /* store column indices (zero start index) */
1446   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1447   count = 0;
1448   for (i=0; i<a->mbs; i++) {
1449     for (j=0; j<bs; j++) {
1450       for (k=a->i[i]; k<a->i[i+1]; k++) {
1451         for (l=0; l<bs; l++) {
1452           jj[count++] = bs*a->j[k] + l;
1453         }
1454       }
1455     }
1456   }
1457   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1458   ierr = PetscFree(jj);CHKERRQ(ierr);
1459 
1460   /* store nonzero values */
1461   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1462   count = 0;
1463   for (i=0; i<a->mbs; i++) {
1464     for (j=0; j<bs; j++) {
1465       for (k=a->i[i]; k<a->i[i+1]; k++) {
1466         for (l=0; l<bs; l++) {
1467           aa[count++] = a->a[bs2*k + l*bs + j];
1468         }
1469       }
1470     }
1471   }
1472   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1473   ierr = PetscFree(aa);CHKERRQ(ierr);
1474 
1475   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1476   if (file) {
1477     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1484 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1485 {
1486   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1487   PetscErrorCode    ierr;
1488   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1489   PetscViewerFormat format;
1490 
1491   PetscFunctionBegin;
1492   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1493   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1494     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1495   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1496     Mat aij;
1497     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1498     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1499     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1500   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1501       PetscFunctionReturn(0);
1502   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1503     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1504     for (i=0; i<a->mbs; i++) {
1505       for (j=0; j<bs; j++) {
1506         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1507         for (k=a->i[i]; k<a->i[i+1]; k++) {
1508           for (l=0; l<bs; l++) {
1509 #if defined(PETSC_USE_COMPLEX)
1510             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1511               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1512                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1513             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1514               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1515                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1516             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1517               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1518             }
1519 #else
1520             if (a->a[bs2*k + l*bs + j] != 0.0) {
1521               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1522             }
1523 #endif
1524           }
1525         }
1526         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1527       }
1528     }
1529     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1530   } else {
1531     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1532     for (i=0; i<a->mbs; i++) {
1533       for (j=0; j<bs; j++) {
1534         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1535         for (k=a->i[i]; k<a->i[i+1]; k++) {
1536           for (l=0; l<bs; l++) {
1537 #if defined(PETSC_USE_COMPLEX)
1538             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1543                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1544             } else {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1546             }
1547 #else
1548             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1549 #endif
1550           }
1551         }
1552         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1553       }
1554     }
1555     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1556   }
1557   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #include <petscdraw.h>
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1564 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1565 {
1566   Mat               A = (Mat) Aa;
1567   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1568   PetscErrorCode    ierr;
1569   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1570   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1571   MatScalar         *aa;
1572   PetscViewer       viewer;
1573   PetscViewerFormat format;
1574 
1575   PetscFunctionBegin;
1576   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1577   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1578 
1579   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1580 
1581   /* loop over matrix elements drawing boxes */
1582 
1583   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1584     color = PETSC_DRAW_BLUE;
1585     for (i=0,row=0; i<mbs; i++,row+=bs) {
1586       for (j=a->i[i]; j<a->i[i+1]; j++) {
1587         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1588         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1589         aa  = a->a + j*bs2;
1590         for (k=0; k<bs; k++) {
1591           for (l=0; l<bs; l++) {
1592             if (PetscRealPart(*aa++) >=  0.) continue;
1593             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1594           }
1595         }
1596       }
1597     }
1598     color = PETSC_DRAW_CYAN;
1599     for (i=0,row=0; i<mbs; i++,row+=bs) {
1600       for (j=a->i[i]; j<a->i[i+1]; j++) {
1601         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1602         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1603         aa  = a->a + j*bs2;
1604         for (k=0; k<bs; k++) {
1605           for (l=0; l<bs; l++) {
1606             if (PetscRealPart(*aa++) != 0.) continue;
1607             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1608           }
1609         }
1610       }
1611     }
1612     color = PETSC_DRAW_RED;
1613     for (i=0,row=0; i<mbs; i++,row+=bs) {
1614       for (j=a->i[i]; j<a->i[i+1]; j++) {
1615         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1616         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1617         aa  = a->a + j*bs2;
1618         for (k=0; k<bs; k++) {
1619           for (l=0; l<bs; l++) {
1620             if (PetscRealPart(*aa++) <= 0.) continue;
1621             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1622           }
1623         }
1624       }
1625     }
1626   } else {
1627     /* use contour shading to indicate magnitude of values */
1628     /* first determine max of all nonzero values */
1629     PetscDraw popup;
1630     PetscReal scale,maxv = 0.0;
1631 
1632     for (i=0; i<a->nz*a->bs2; i++) {
1633       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1634     }
1635     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1636     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1637     if (popup) {
1638       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1639     }
1640     for (i=0,row=0; i<mbs; i++,row+=bs) {
1641       for (j=a->i[i]; j<a->i[i+1]; j++) {
1642         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1643         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1644         aa  = a->a + j*bs2;
1645         for (k=0; k<bs; k++) {
1646           for (l=0; l<bs; l++) {
1647             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1648             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1649           }
1650         }
1651       }
1652     }
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1659 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1660 {
1661   PetscErrorCode ierr;
1662   PetscReal      xl,yl,xr,yr,w,h;
1663   PetscDraw      draw;
1664   PetscBool      isnull;
1665 
1666   PetscFunctionBegin;
1667   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1668   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1669 
1670   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1671   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1672   xr  += w;    yr += h;  xl = -w;     yl = -h;
1673   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1674   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1675   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatView_SeqBAIJ"
1681 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1682 {
1683   PetscErrorCode ierr;
1684   PetscBool      iascii,isbinary,isdraw;
1685 
1686   PetscFunctionBegin;
1687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1688   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1689   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1690   if (iascii) {
1691     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1692   } else if (isbinary) {
1693     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1694   } else if (isdraw) {
1695     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1696   } else {
1697     Mat B;
1698     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1699     ierr = MatView(B,viewer);CHKERRQ(ierr);
1700     ierr = MatDestroy(&B);CHKERRQ(ierr);
1701   }
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1708 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1709 {
1710   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1711   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1712   PetscInt    *ai = a->i,*ailen = a->ilen;
1713   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1714   MatScalar   *ap,*aa = a->a;
1715 
1716   PetscFunctionBegin;
1717   for (k=0; k<m; k++) { /* loop over rows */
1718     row = im[k]; brow = row/bs;
1719     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1720     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1721     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1722     nrow = ailen[brow];
1723     for (l=0; l<n; l++) { /* loop over columns */
1724       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1725       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1726       col  = in[l];
1727       bcol = col/bs;
1728       cidx = col%bs;
1729       ridx = row%bs;
1730       high = nrow;
1731       low  = 0; /* assume unsorted */
1732       while (high-low > 5) {
1733         t = (low+high)/2;
1734         if (rp[t] > bcol) high = t;
1735         else             low  = t;
1736       }
1737       for (i=low; i<high; i++) {
1738         if (rp[i] > bcol) break;
1739         if (rp[i] == bcol) {
1740           *v++ = ap[bs2*i+bs*cidx+ridx];
1741           goto finished;
1742         }
1743       }
1744       *v++ = 0.0;
1745 finished:;
1746     }
1747   }
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1753 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1754 {
1755   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1756   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1757   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1758   PetscErrorCode    ierr;
1759   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1760   PetscBool         roworiented=a->roworiented;
1761   const PetscScalar *value     = v;
1762   MatScalar         *ap,*aa = a->a,*bap;
1763 
1764   PetscFunctionBegin;
1765   if (roworiented) {
1766     stepval = (n-1)*bs;
1767   } else {
1768     stepval = (m-1)*bs;
1769   }
1770   for (k=0; k<m; k++) { /* loop over added rows */
1771     row = im[k];
1772     if (row < 0) continue;
1773 #if defined(PETSC_USE_DEBUG)
1774     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1775 #endif
1776     rp   = aj + ai[row];
1777     ap   = aa + bs2*ai[row];
1778     rmax = imax[row];
1779     nrow = ailen[row];
1780     low  = 0;
1781     high = nrow;
1782     for (l=0; l<n; l++) { /* loop over added columns */
1783       if (in[l] < 0) continue;
1784 #if defined(PETSC_USE_DEBUG)
1785       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1786 #endif
1787       col = in[l];
1788       if (roworiented) {
1789         value = v + (k*(stepval+bs) + l)*bs;
1790       } else {
1791         value = v + (l*(stepval+bs) + k)*bs;
1792       }
1793       if (col <= lastcol) low = 0;
1794       else high = nrow;
1795       lastcol = col;
1796       while (high-low > 7) {
1797         t = (low+high)/2;
1798         if (rp[t] > col) high = t;
1799         else             low  = t;
1800       }
1801       for (i=low; i<high; i++) {
1802         if (rp[i] > col) break;
1803         if (rp[i] == col) {
1804           bap = ap +  bs2*i;
1805           if (roworiented) {
1806             if (is == ADD_VALUES) {
1807               for (ii=0; ii<bs; ii++,value+=stepval) {
1808                 for (jj=ii; jj<bs2; jj+=bs) {
1809                   bap[jj] += *value++;
1810                 }
1811               }
1812             } else {
1813               for (ii=0; ii<bs; ii++,value+=stepval) {
1814                 for (jj=ii; jj<bs2; jj+=bs) {
1815                   bap[jj] = *value++;
1816                 }
1817               }
1818             }
1819           } else {
1820             if (is == ADD_VALUES) {
1821               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1822                 for (jj=0; jj<bs; jj++) {
1823                   bap[jj] += value[jj];
1824                 }
1825                 bap += bs;
1826               }
1827             } else {
1828               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1829                 for (jj=0; jj<bs; jj++) {
1830                   bap[jj]  = value[jj];
1831                 }
1832                 bap += bs;
1833               }
1834             }
1835           }
1836           goto noinsert2;
1837         }
1838       }
1839       if (nonew == 1) goto noinsert2;
1840       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1841       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1842       N = nrow++ - 1; high++;
1843       /* shift up all the later entries in this row */
1844       for (ii=N; ii>=i; ii--) {
1845         rp[ii+1] = rp[ii];
1846         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1847       }
1848       if (N >= i) {
1849         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1850       }
1851       rp[i] = col;
1852       bap   = ap +  bs2*i;
1853       if (roworiented) {
1854         for (ii=0; ii<bs; ii++,value+=stepval) {
1855           for (jj=ii; jj<bs2; jj+=bs) {
1856             bap[jj] = *value++;
1857           }
1858         }
1859       } else {
1860         for (ii=0; ii<bs; ii++,value+=stepval) {
1861           for (jj=0; jj<bs; jj++) {
1862             *bap++ = *value++;
1863           }
1864         }
1865       }
1866 noinsert2:;
1867       low = i;
1868     }
1869     ailen[row] = nrow;
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1876 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1877 {
1878   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1879   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1880   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1881   PetscErrorCode ierr;
1882   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1883   MatScalar      *aa  = a->a,*ap;
1884   PetscReal      ratio=0.6;
1885 
1886   PetscFunctionBegin;
1887   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1888 
1889   if (m) rmax = ailen[0];
1890   for (i=1; i<mbs; i++) {
1891     /* move each row back by the amount of empty slots (fshift) before it*/
1892     fshift += imax[i-1] - ailen[i-1];
1893     rmax    = PetscMax(rmax,ailen[i]);
1894     if (fshift) {
1895       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1896       N  = ailen[i];
1897       for (j=0; j<N; j++) {
1898         ip[j-fshift] = ip[j];
1899 
1900         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1901       }
1902     }
1903     ai[i] = ai[i-1] + ailen[i-1];
1904   }
1905   if (mbs) {
1906     fshift += imax[mbs-1] - ailen[mbs-1];
1907     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1908   }
1909 
1910   /* reset ilen and imax for each row */
1911   a->nonzerorowcnt = 0;
1912   for (i=0; i<mbs; i++) {
1913     ailen[i] = imax[i] = ai[i+1] - ai[i];
1914     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1915   }
1916   a->nz = ai[mbs];
1917 
1918   /* diagonals may have moved, so kill the diagonal pointers */
1919   a->idiagvalid = PETSC_FALSE;
1920   if (fshift && a->diag) {
1921     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1922     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1923     a->diag = 0;
1924   }
1925   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1926   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1927   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1928   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1929 
1930   A->info.mallocs    += a->reallocs;
1931   a->reallocs         = 0;
1932   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1933   a->rmax             = rmax;
1934 
1935   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 /*
1940    This function returns an array of flags which indicate the locations of contiguous
1941    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1942    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1943    Assume: sizes should be long enough to hold all the values.
1944 */
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1947 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1948 {
1949   PetscInt  i,j,k,row;
1950   PetscBool flg;
1951 
1952   PetscFunctionBegin;
1953   for (i=0,j=0; i<n; j++) {
1954     row = idx[i];
1955     if (row%bs!=0) { /* Not the begining of a block */
1956       sizes[j] = 1;
1957       i++;
1958     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1959       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1960       i++;
1961     } else { /* Begining of the block, so check if the complete block exists */
1962       flg = PETSC_TRUE;
1963       for (k=1; k<bs; k++) {
1964         if (row+k != idx[i+k]) { /* break in the block */
1965           flg = PETSC_FALSE;
1966           break;
1967         }
1968       }
1969       if (flg) { /* No break in the bs */
1970         sizes[j] = bs;
1971         i       += bs;
1972       } else {
1973         sizes[j] = 1;
1974         i++;
1975       }
1976     }
1977   }
1978   *bs_max = j;
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1984 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1985 {
1986   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1987   PetscErrorCode    ierr;
1988   PetscInt          i,j,k,count,*rows;
1989   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1990   PetscScalar       zero = 0.0;
1991   MatScalar         *aa;
1992   const PetscScalar *xx;
1993   PetscScalar       *bb;
1994 
1995   PetscFunctionBegin;
1996   /* fix right hand side if needed */
1997   if (x && b) {
1998     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1999     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2000     for (i=0; i<is_n; i++) {
2001       bb[is_idx[i]] = diag*xx[is_idx[i]];
2002     }
2003     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2004     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2005   }
2006 
2007   /* Make a copy of the IS and  sort it */
2008   /* allocate memory for rows,sizes */
2009   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2010 
2011   /* copy IS values to rows, and sort them */
2012   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2013   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2014 
2015   if (baij->keepnonzeropattern) {
2016     for (i=0; i<is_n; i++) sizes[i] = 1;
2017     bs_max          = is_n;
2018   } else {
2019     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2020     A->nonzerostate++;
2021   }
2022 
2023   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2024     row = rows[j];
2025     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2026     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2027     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2028     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2029       if (diag != (PetscScalar)0.0) {
2030         if (baij->ilen[row/bs] > 0) {
2031           baij->ilen[row/bs]       = 1;
2032           baij->j[baij->i[row/bs]] = row/bs;
2033 
2034           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2035         }
2036         /* Now insert all the diagonal values for this bs */
2037         for (k=0; k<bs; k++) {
2038           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2039         }
2040       } else { /* (diag == 0.0) */
2041         baij->ilen[row/bs] = 0;
2042       } /* end (diag == 0.0) */
2043     } else { /* (sizes[i] != bs) */
2044 #if defined(PETSC_USE_DEBUG)
2045       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2046 #endif
2047       for (k=0; k<count; k++) {
2048         aa[0] =  zero;
2049         aa   += bs;
2050       }
2051       if (diag != (PetscScalar)0.0) {
2052         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2053       }
2054     }
2055   }
2056 
2057   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2058   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2064 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2065 {
2066   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2067   PetscErrorCode    ierr;
2068   PetscInt          i,j,k,count;
2069   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2070   PetscScalar       zero = 0.0;
2071   MatScalar         *aa;
2072   const PetscScalar *xx;
2073   PetscScalar       *bb;
2074   PetscBool         *zeroed,vecs = PETSC_FALSE;
2075 
2076   PetscFunctionBegin;
2077   /* fix right hand side if needed */
2078   if (x && b) {
2079     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2080     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2081     vecs = PETSC_TRUE;
2082   }
2083 
2084   /* zero the columns */
2085   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2086   for (i=0; i<is_n; i++) {
2087     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2088     zeroed[is_idx[i]] = PETSC_TRUE;
2089   }
2090   for (i=0; i<A->rmap->N; i++) {
2091     if (!zeroed[i]) {
2092       row = i/bs;
2093       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2094         for (k=0; k<bs; k++) {
2095           col = bs*baij->j[j] + k;
2096           if (zeroed[col]) {
2097             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2098             if (vecs) bb[i] -= aa[0]*xx[col];
2099             aa[0] = 0.0;
2100           }
2101         }
2102       }
2103     } else if (vecs) bb[i] = diag*xx[i];
2104   }
2105   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2106   if (vecs) {
2107     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2108     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2109   }
2110 
2111   /* zero the rows */
2112   for (i=0; i<is_n; i++) {
2113     row   = is_idx[i];
2114     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2115     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2116     for (k=0; k<count; k++) {
2117       aa[0] =  zero;
2118       aa   += bs;
2119     }
2120     if (diag != (PetscScalar)0.0) {
2121       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2122     }
2123   }
2124   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2130 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2131 {
2132   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2133   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2134   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2135   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2136   PetscErrorCode ierr;
2137   PetscInt       ridx,cidx,bs2=a->bs2;
2138   PetscBool      roworiented=a->roworiented;
2139   MatScalar      *ap,value,*aa=a->a,*bap;
2140 
2141   PetscFunctionBegin;
2142   for (k=0; k<m; k++) { /* loop over added rows */
2143     row  = im[k];
2144     brow = row/bs;
2145     if (row < 0) continue;
2146 #if defined(PETSC_USE_DEBUG)
2147     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2148 #endif
2149     rp   = aj + ai[brow];
2150     ap   = aa + bs2*ai[brow];
2151     rmax = imax[brow];
2152     nrow = ailen[brow];
2153     low  = 0;
2154     high = nrow;
2155     for (l=0; l<n; l++) { /* loop over added columns */
2156       if (in[l] < 0) continue;
2157 #if defined(PETSC_USE_DEBUG)
2158       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2159 #endif
2160       col  = in[l]; bcol = col/bs;
2161       ridx = row % bs; cidx = col % bs;
2162       if (roworiented) {
2163         value = v[l + k*n];
2164       } else {
2165         value = v[k + l*m];
2166       }
2167       if (col <= lastcol) low = 0; else high = nrow;
2168       lastcol = col;
2169       while (high-low > 7) {
2170         t = (low+high)/2;
2171         if (rp[t] > bcol) high = t;
2172         else              low  = t;
2173       }
2174       for (i=low; i<high; i++) {
2175         if (rp[i] > bcol) break;
2176         if (rp[i] == bcol) {
2177           bap = ap +  bs2*i + bs*cidx + ridx;
2178           if (is == ADD_VALUES) *bap += value;
2179           else                  *bap  = value;
2180           goto noinsert1;
2181         }
2182       }
2183       if (nonew == 1) goto noinsert1;
2184       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2185       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2186       N = nrow++ - 1; high++;
2187       /* shift up all the later entries in this row */
2188       for (ii=N; ii>=i; ii--) {
2189         rp[ii+1] = rp[ii];
2190         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2191       }
2192       if (N>=i) {
2193         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2194       }
2195       rp[i]                      = bcol;
2196       ap[bs2*i + bs*cidx + ridx] = value;
2197       a->nz++;
2198       A->nonzerostate++;
2199 noinsert1:;
2200       low = i;
2201     }
2202     ailen[brow] = nrow;
2203   }
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2209 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2210 {
2211   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2212   Mat            outA;
2213   PetscErrorCode ierr;
2214   PetscBool      row_identity,col_identity;
2215 
2216   PetscFunctionBegin;
2217   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2218   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2219   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2220   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2221 
2222   outA            = inA;
2223   inA->factortype = MAT_FACTOR_LU;
2224 
2225   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2226 
2227   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2228   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2229   a->row = row;
2230   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2231   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2232   a->col = col;
2233 
2234   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2235   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2236   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2237   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2238 
2239   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2240   if (!a->solve_work) {
2241     ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr);
2242     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2243   }
2244   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2250 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2251 {
2252   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2253   PetscInt    i,nz,mbs;
2254 
2255   PetscFunctionBegin;
2256   nz  = baij->maxnz;
2257   mbs = baij->mbs;
2258   for (i=0; i<nz; i++) {
2259     baij->j[i] = indices[i];
2260   }
2261   baij->nz = nz;
2262   for (i=0; i<mbs; i++) {
2263     baij->ilen[i] = baij->imax[i];
2264   }
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2270 /*@
2271     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2272        in the matrix.
2273 
2274   Input Parameters:
2275 +  mat - the SeqBAIJ matrix
2276 -  indices - the column indices
2277 
2278   Level: advanced
2279 
2280   Notes:
2281     This can be called if you have precomputed the nonzero structure of the
2282   matrix and want to provide it to the matrix object to improve the performance
2283   of the MatSetValues() operation.
2284 
2285     You MUST have set the correct numbers of nonzeros per row in the call to
2286   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2287 
2288     MUST be called before any calls to MatSetValues();
2289 
2290 @*/
2291 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2292 {
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2297   PetscValidPointer(indices,2);
2298   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2304 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2305 {
2306   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2307   PetscErrorCode ierr;
2308   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2309   PetscReal      atmp;
2310   PetscScalar    *x,zero = 0.0;
2311   MatScalar      *aa;
2312   PetscInt       ncols,brow,krow,kcol;
2313 
2314   PetscFunctionBegin;
2315   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2316   bs  = A->rmap->bs;
2317   aa  = a->a;
2318   ai  = a->i;
2319   aj  = a->j;
2320   mbs = a->mbs;
2321 
2322   ierr = VecSet(v,zero);CHKERRQ(ierr);
2323   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2324   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2325   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2326   for (i=0; i<mbs; i++) {
2327     ncols = ai[1] - ai[0]; ai++;
2328     brow  = bs*i;
2329     for (j=0; j<ncols; j++) {
2330       for (kcol=0; kcol<bs; kcol++) {
2331         for (krow=0; krow<bs; krow++) {
2332           atmp = PetscAbsScalar(*aa);aa++;
2333           row  = brow + krow;   /* row index */
2334           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2335         }
2336       }
2337       aj++;
2338     }
2339   }
2340   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatCopy_SeqBAIJ"
2346 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2347 {
2348   PetscErrorCode ierr;
2349 
2350   PetscFunctionBegin;
2351   /* If the two matrices have the same copy implementation, use fast copy. */
2352   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2353     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2354     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2355     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2356 
2357     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2358     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2359     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2360   } else {
2361     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2362   }
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2368 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2369 {
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2379 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2380 {
2381   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2382 
2383   PetscFunctionBegin;
2384   *array = a->a;
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2390 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2391 {
2392   PetscFunctionBegin;
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2398 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2399 {
2400   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2401   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2402   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2403   PetscErrorCode ierr;
2404 
2405   PetscFunctionBegin;
2406   /* Set the number of nonzeros in the new matrix */
2407   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2413 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2414 {
2415   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2416   PetscErrorCode ierr;
2417   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2418   PetscBLASInt   one=1;
2419 
2420   PetscFunctionBegin;
2421   if (str == SAME_NONZERO_PATTERN) {
2422     PetscScalar  alpha = a;
2423     PetscBLASInt bnz;
2424     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2425     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2426     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2427   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2428     if (y->xtoy && y->XtoY != X) {
2429       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2430       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2431     }
2432     if (!y->xtoy) { /* get xtoy */
2433       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2434       y->XtoY = X;
2435       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2436     }
2437     for (i=0; i<x->nz; i++) {
2438       j = 0;
2439       while (j < bs2) {
2440         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2441         j++;
2442       }
2443     }
2444     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2445     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(double)((PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2446   } else {
2447     Mat      B;
2448     PetscInt *nnz;
2449     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2450     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2451     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2452     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2453     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2454     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2455     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2456     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2457     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2458     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2459     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2460     ierr = PetscFree(nnz);CHKERRQ(ierr);
2461   }
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2467 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2468 {
2469   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2470   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2471   MatScalar   *aa = a->a;
2472 
2473   PetscFunctionBegin;
2474   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2480 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2481 {
2482   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2483   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2484   MatScalar   *aa = a->a;
2485 
2486   PetscFunctionBegin;
2487   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNCT__
2492 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2493 /*
2494     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2495 */
2496 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2497 {
2498   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2499   PetscErrorCode ierr;
2500   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2501   PetscInt       nz = a->i[m],row,*jj,mr,col;
2502 
2503   PetscFunctionBegin;
2504   *nn = n;
2505   if (!ia) PetscFunctionReturn(0);
2506   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2507   else {
2508     ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2509     ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2510     ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2511     jj   = a->j;
2512     for (i=0; i<nz; i++) {
2513       collengths[jj[i]]++;
2514     }
2515     cia[0] = oshift;
2516     for (i=0; i<n; i++) {
2517       cia[i+1] = cia[i] + collengths[i];
2518     }
2519     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2520     jj   = a->j;
2521     for (row=0; row<m; row++) {
2522       mr = a->i[row+1] - a->i[row];
2523       for (i=0; i<mr; i++) {
2524         col = *jj++;
2525 
2526         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2527       }
2528     }
2529     ierr = PetscFree(collengths);CHKERRQ(ierr);
2530     *ia  = cia; *ja = cja;
2531   }
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2537 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2538 {
2539   PetscErrorCode ierr;
2540 
2541   PetscFunctionBegin;
2542   if (!ia) PetscFunctionReturn(0);
2543   ierr = PetscFree(*ia);CHKERRQ(ierr);
2544   ierr = PetscFree(*ja);CHKERRQ(ierr);
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 /*
2549  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2550  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2551  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2552  */
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2555 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2556 {
2557   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2558   PetscErrorCode ierr;
2559   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2560   PetscInt       nz = a->i[m],row,*jj,mr,col;
2561   PetscInt       *cspidx;
2562 
2563   PetscFunctionBegin;
2564   *nn = n;
2565   if (!ia) PetscFunctionReturn(0);
2566 
2567   ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2568   ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2569   ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2570   ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr);
2571   jj   = a->j;
2572   for (i=0; i<nz; i++) {
2573     collengths[jj[i]]++;
2574   }
2575   cia[0] = oshift;
2576   for (i=0; i<n; i++) {
2577     cia[i+1] = cia[i] + collengths[i];
2578   }
2579   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2580   jj   = a->j;
2581   for (row=0; row<m; row++) {
2582     mr = a->i[row+1] - a->i[row];
2583     for (i=0; i<mr; i++) {
2584       col = *jj++;
2585       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2586       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2587     }
2588   }
2589   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2590   *ia    = cia; *ja = cja;
2591   *spidx = cspidx;
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2597 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2598 {
2599   PetscErrorCode ierr;
2600 
2601   PetscFunctionBegin;
2602   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2603   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 /* -------------------------------------------------------------------*/
2608 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2609                                        MatGetRow_SeqBAIJ,
2610                                        MatRestoreRow_SeqBAIJ,
2611                                        MatMult_SeqBAIJ_N,
2612                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2613                                        MatMultTranspose_SeqBAIJ,
2614                                        MatMultTransposeAdd_SeqBAIJ,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                /* 10*/ 0,
2619                                        MatLUFactor_SeqBAIJ,
2620                                        0,
2621                                        0,
2622                                        MatTranspose_SeqBAIJ,
2623                                /* 15*/ MatGetInfo_SeqBAIJ,
2624                                        MatEqual_SeqBAIJ,
2625                                        MatGetDiagonal_SeqBAIJ,
2626                                        MatDiagonalScale_SeqBAIJ,
2627                                        MatNorm_SeqBAIJ,
2628                                /* 20*/ 0,
2629                                        MatAssemblyEnd_SeqBAIJ,
2630                                        MatSetOption_SeqBAIJ,
2631                                        MatZeroEntries_SeqBAIJ,
2632                                /* 24*/ MatZeroRows_SeqBAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                /* 29*/ MatSetUp_SeqBAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                /* 34*/ MatDuplicate_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        MatILUFactor_SeqBAIJ,
2646                                        0,
2647                                /* 39*/ MatAXPY_SeqBAIJ,
2648                                        MatGetSubMatrices_SeqBAIJ,
2649                                        MatIncreaseOverlap_SeqBAIJ,
2650                                        MatGetValues_SeqBAIJ,
2651                                        MatCopy_SeqBAIJ,
2652                                /* 44*/ 0,
2653                                        MatScale_SeqBAIJ,
2654                                        0,
2655                                        0,
2656                                        MatZeroRowsColumns_SeqBAIJ,
2657                                /* 49*/ 0,
2658                                        MatGetRowIJ_SeqBAIJ,
2659                                        MatRestoreRowIJ_SeqBAIJ,
2660                                        MatGetColumnIJ_SeqBAIJ,
2661                                        MatRestoreColumnIJ_SeqBAIJ,
2662                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        MatSetValuesBlocked_SeqBAIJ,
2667                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2668                                        MatDestroy_SeqBAIJ,
2669                                        MatView_SeqBAIJ,
2670                                        0,
2671                                        0,
2672                                /* 64*/ 0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2678                                        0,
2679                                        MatConvert_Basic,
2680                                        0,
2681                                        0,
2682                                /* 74*/ 0,
2683                                        MatFDColoringApply_BAIJ,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                /* 79*/ 0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                        MatLoad_SeqBAIJ,
2692                                /* 84*/ 0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 89*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                /* 94*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /* 99*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                /*104*/ 0,
2713                                        MatRealPart_SeqBAIJ,
2714                                        MatImaginaryPart_SeqBAIJ,
2715                                        0,
2716                                        0,
2717                                /*109*/ 0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        MatMissingDiagonal_SeqBAIJ,
2722                                /*114*/ 0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                /*119*/ 0,
2728                                        0,
2729                                        MatMultHermitianTranspose_SeqBAIJ,
2730                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2731                                        0,
2732                                /*124*/ 0,
2733                                        0,
2734                                        MatInvertBlockDiagonal_SeqBAIJ,
2735                                        0,
2736                                        0,
2737                                /*129*/ 0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                        0,
2742                                /*134*/ 0,
2743                                        0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                /*139*/ 0,
2748                                        0,
2749                                        0,
2750                                        MatFDColoringSetUp_SeqXAIJ,
2751                                        0,
2752                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2753 };
2754 
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2757 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2758 {
2759   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2760   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2761   PetscErrorCode ierr;
2762 
2763   PetscFunctionBegin;
2764   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2765 
2766   /* allocate space for values if not already there */
2767   if (!aij->saved_values) {
2768     ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr);
2769     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2770   }
2771 
2772   /* copy values over */
2773   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2779 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2780 {
2781   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2782   PetscErrorCode ierr;
2783   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2784 
2785   PetscFunctionBegin;
2786   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2787   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2788 
2789   /* copy values over */
2790   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2795 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2796 
2797 #undef __FUNCT__
2798 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2799 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2800 {
2801   Mat_SeqBAIJ    *b;
2802   PetscErrorCode ierr;
2803   PetscInt       i,mbs,nbs,bs2;
2804   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2805 
2806   PetscFunctionBegin;
2807   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2808   if (nz == MAT_SKIP_ALLOCATION) {
2809     skipallocation = PETSC_TRUE;
2810     nz             = 0;
2811   }
2812 
2813   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2814   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2815   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2817 
2818   B->preallocated = PETSC_TRUE;
2819 
2820   mbs = B->rmap->n/bs;
2821   nbs = B->cmap->n/bs;
2822   bs2 = bs*bs;
2823 
2824   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2825 
2826   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2827   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2828   if (nnz) {
2829     for (i=0; i<mbs; i++) {
2830       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2831       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2832     }
2833   }
2834 
2835   b    = (Mat_SeqBAIJ*)B->data;
2836   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2837   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2838   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2839 
2840   if (!flg) {
2841     switch (bs) {
2842     case 1:
2843       B->ops->mult    = MatMult_SeqBAIJ_1;
2844       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2845       break;
2846     case 2:
2847       B->ops->mult    = MatMult_SeqBAIJ_2;
2848       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2849       break;
2850     case 3:
2851       B->ops->mult    = MatMult_SeqBAIJ_3;
2852       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2853       break;
2854     case 4:
2855       B->ops->mult    = MatMult_SeqBAIJ_4;
2856       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2857       break;
2858     case 5:
2859       B->ops->mult    = MatMult_SeqBAIJ_5;
2860       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2861       break;
2862     case 6:
2863       B->ops->mult    = MatMult_SeqBAIJ_6;
2864       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2865       break;
2866     case 7:
2867       B->ops->mult    = MatMult_SeqBAIJ_7;
2868       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2869       break;
2870     case 15:
2871       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2872       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2873       break;
2874     default:
2875       B->ops->mult    = MatMult_SeqBAIJ_N;
2876       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2877       break;
2878     }
2879   }
2880   B->ops->sor = MatSOR_SeqBAIJ;
2881   b->mbs = mbs;
2882   b->nbs = nbs;
2883   if (!skipallocation) {
2884     if (!b->imax) {
2885       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2886       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2887 
2888       b->free_imax_ilen = PETSC_TRUE;
2889     }
2890     /* b->ilen will count nonzeros in each block row so far. */
2891     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2892     if (!nnz) {
2893       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2894       else if (nz < 0) nz = 1;
2895       for (i=0; i<mbs; i++) b->imax[i] = nz;
2896       nz = nz*mbs;
2897     } else {
2898       nz = 0;
2899       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2900     }
2901 
2902     /* allocate the matrix space */
2903     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2904     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2905     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2906     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2907     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2908 
2909     b->singlemalloc = PETSC_TRUE;
2910     b->i[0]         = 0;
2911     for (i=1; i<mbs+1; i++) {
2912       b->i[i] = b->i[i-1] + b->imax[i-1];
2913     }
2914     b->free_a  = PETSC_TRUE;
2915     b->free_ij = PETSC_TRUE;
2916 #if defined(PETSC_THREADCOMM_ACTIVE)
2917     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2918 #endif
2919   } else {
2920     b->free_a  = PETSC_FALSE;
2921     b->free_ij = PETSC_FALSE;
2922   }
2923 
2924   b->bs2              = bs2;
2925   b->mbs              = mbs;
2926   b->nz               = 0;
2927   b->maxnz            = nz;
2928   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2929   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2935 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2936 {
2937   PetscInt       i,m,nz,nz_max=0,*nnz;
2938   PetscScalar    *values=0;
2939   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2940   PetscErrorCode ierr;
2941 
2942   PetscFunctionBegin;
2943   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2944   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2945   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2946   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2947   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2948   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2949   m    = B->rmap->n/bs;
2950 
2951   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2952   ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr);
2953   for (i=0; i<m; i++) {
2954     nz = ii[i+1]- ii[i];
2955     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2956     nz_max = PetscMax(nz_max, nz);
2957     nnz[i] = nz;
2958   }
2959   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2960   ierr = PetscFree(nnz);CHKERRQ(ierr);
2961 
2962   values = (PetscScalar*)V;
2963   if (!values) {
2964     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2965   }
2966   for (i=0; i<m; i++) {
2967     PetscInt          ncols  = ii[i+1] - ii[i];
2968     const PetscInt    *icols = jj + ii[i];
2969     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2970     if (!roworiented) {
2971       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2972     } else {
2973       PetscInt j;
2974       for (j=0; j<ncols; j++) {
2975         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2976         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2977       }
2978     }
2979   }
2980   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2981   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2982   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2983   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2988 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
2989 #if defined(PETSC_HAVE_MUMPS)
2990 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2991 #endif
2992 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
2993 
2994 /*MC
2995    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2996    block sparse compressed row format.
2997 
2998    Options Database Keys:
2999 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3000 
3001   Level: beginner
3002 
3003 .seealso: MatCreateSeqBAIJ()
3004 M*/
3005 
3006 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3007 
3008 #undef __FUNCT__
3009 #define __FUNCT__ "MatCreate_SeqBAIJ"
3010 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3011 {
3012   PetscErrorCode ierr;
3013   PetscMPIInt    size;
3014   Mat_SeqBAIJ    *b;
3015 
3016   PetscFunctionBegin;
3017   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3018   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3019 
3020   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3021   B->data = (void*)b;
3022   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3023 
3024   b->row          = 0;
3025   b->col          = 0;
3026   b->icol         = 0;
3027   b->reallocs     = 0;
3028   b->saved_values = 0;
3029 
3030   b->roworiented        = PETSC_TRUE;
3031   b->nonew              = 0;
3032   b->diag               = 0;
3033   b->solve_work         = 0;
3034   b->mult_work          = 0;
3035   B->spptr              = 0;
3036   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3037   b->keepnonzeropattern = PETSC_FALSE;
3038   b->xtoy               = 0;
3039   b->XtoY               = 0;
3040 
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3044 #if defined(PETSC_HAVE_MUMPS)
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3046 #endif
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 #undef __FUNCT__
3062 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3063 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3064 {
3065   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3066   PetscErrorCode ierr;
3067   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3068 
3069   PetscFunctionBegin;
3070   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3071 
3072   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3073     c->imax           = a->imax;
3074     c->ilen           = a->ilen;
3075     c->free_imax_ilen = PETSC_FALSE;
3076   } else {
3077     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3078     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3079     for (i=0; i<mbs; i++) {
3080       c->imax[i] = a->imax[i];
3081       c->ilen[i] = a->ilen[i];
3082     }
3083     c->free_imax_ilen = PETSC_TRUE;
3084   }
3085 
3086   /* allocate the matrix space */
3087   if (mallocmatspace) {
3088     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3089       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3090       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3091 
3092       c->i            = a->i;
3093       c->j            = a->j;
3094       c->singlemalloc = PETSC_FALSE;
3095       c->free_a       = PETSC_TRUE;
3096       c->free_ij      = PETSC_FALSE;
3097       c->parent       = A;
3098       C->preallocated = PETSC_TRUE;
3099       C->assembled    = PETSC_TRUE;
3100 
3101       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3102       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3103       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3104     } else {
3105       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3106       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3107 
3108       c->singlemalloc = PETSC_TRUE;
3109       c->free_a       = PETSC_TRUE;
3110       c->free_ij      = PETSC_TRUE;
3111 
3112       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3113       if (mbs > 0) {
3114         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3115         if (cpvalues == MAT_COPY_VALUES) {
3116           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3117         } else {
3118           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3119         }
3120       }
3121       C->preallocated = PETSC_TRUE;
3122       C->assembled    = PETSC_TRUE;
3123     }
3124   }
3125 
3126   c->roworiented = a->roworiented;
3127   c->nonew       = a->nonew;
3128 
3129   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3130   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3131 
3132   c->bs2         = a->bs2;
3133   c->mbs         = a->mbs;
3134   c->nbs         = a->nbs;
3135 
3136   if (a->diag) {
3137     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3138       c->diag      = a->diag;
3139       c->free_diag = PETSC_FALSE;
3140     } else {
3141       ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr);
3142       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3143       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3144       c->free_diag = PETSC_TRUE;
3145     }
3146   } else c->diag = 0;
3147 
3148   c->nz         = a->nz;
3149   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3150   c->solve_work = 0;
3151   c->mult_work  = 0;
3152 
3153   c->compressedrow.use   = a->compressedrow.use;
3154   c->compressedrow.nrows = a->compressedrow.nrows;
3155   if (a->compressedrow.use) {
3156     i    = a->compressedrow.nrows;
3157     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3158     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3159     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3160     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3161   } else {
3162     c->compressedrow.use    = PETSC_FALSE;
3163     c->compressedrow.i      = NULL;
3164     c->compressedrow.rindex = NULL;
3165   }
3166   C->nonzerostate = A->nonzerostate;
3167 
3168   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3169   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3170   PetscFunctionReturn(0);
3171 }
3172 
3173 #undef __FUNCT__
3174 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3175 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3176 {
3177   PetscErrorCode ierr;
3178 
3179   PetscFunctionBegin;
3180   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3181   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3182   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3183   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3184   PetscFunctionReturn(0);
3185 }
3186 
3187 #undef __FUNCT__
3188 #define __FUNCT__ "MatLoad_SeqBAIJ"
3189 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3190 {
3191   Mat_SeqBAIJ    *a;
3192   PetscErrorCode ierr;
3193   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3194   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3195   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3196   PetscInt       *masked,nmask,tmp,bs2,ishift;
3197   PetscMPIInt    size;
3198   int            fd;
3199   PetscScalar    *aa;
3200   MPI_Comm       comm;
3201 
3202   PetscFunctionBegin;
3203   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3204   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3205   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3206   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3207   bs2  = bs*bs;
3208 
3209   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3210   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3211   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3212   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3213   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3214   M = header[1]; N = header[2]; nz = header[3];
3215 
3216   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3217   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3218 
3219   /*
3220      This code adds extra rows to make sure the number of rows is
3221     divisible by the blocksize
3222   */
3223   mbs        = M/bs;
3224   extra_rows = bs - M + bs*(mbs);
3225   if (extra_rows == bs) extra_rows = 0;
3226   else mbs++;
3227   if (extra_rows) {
3228     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3229   }
3230 
3231   /* Set global sizes if not already set */
3232   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3233     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3234   } else { /* Check if the matrix global sizes are correct */
3235     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3236     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3237       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3238     }
3239     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3240   }
3241 
3242   /* read in row lengths */
3243   ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
3244   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3245   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3246 
3247   /* read in column indices */
3248   ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr);
3249   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3250   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3251 
3252   /* loop over row lengths determining block row lengths */
3253   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3254   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3255   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3256   rowcount = 0;
3257   nzcount  = 0;
3258   for (i=0; i<mbs; i++) {
3259     nmask = 0;
3260     for (j=0; j<bs; j++) {
3261       kmax = rowlengths[rowcount];
3262       for (k=0; k<kmax; k++) {
3263         tmp = jj[nzcount++]/bs;
3264         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3265       }
3266       rowcount++;
3267     }
3268     browlengths[i] += nmask;
3269     /* zero out the mask elements we set */
3270     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3271   }
3272 
3273   /* Do preallocation  */
3274   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3275   a    = (Mat_SeqBAIJ*)newmat->data;
3276 
3277   /* set matrix "i" values */
3278   a->i[0] = 0;
3279   for (i=1; i<= mbs; i++) {
3280     a->i[i]      = a->i[i-1] + browlengths[i-1];
3281     a->ilen[i-1] = browlengths[i-1];
3282   }
3283   a->nz = 0;
3284   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3285 
3286   /* read in nonzero values */
3287   ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr);
3288   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3289   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3290 
3291   /* set "a" and "j" values into matrix */
3292   nzcount = 0; jcount = 0;
3293   for (i=0; i<mbs; i++) {
3294     nzcountb = nzcount;
3295     nmask    = 0;
3296     for (j=0; j<bs; j++) {
3297       kmax = rowlengths[i*bs+j];
3298       for (k=0; k<kmax; k++) {
3299         tmp = jj[nzcount++]/bs;
3300         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3301       }
3302     }
3303     /* sort the masked values */
3304     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3305 
3306     /* set "j" values into matrix */
3307     maskcount = 1;
3308     for (j=0; j<nmask; j++) {
3309       a->j[jcount++]  = masked[j];
3310       mask[masked[j]] = maskcount++;
3311     }
3312     /* set "a" values into matrix */
3313     ishift = bs2*a->i[i];
3314     for (j=0; j<bs; j++) {
3315       kmax = rowlengths[i*bs+j];
3316       for (k=0; k<kmax; k++) {
3317         tmp       = jj[nzcountb]/bs;
3318         block     = mask[tmp] - 1;
3319         point     = jj[nzcountb] - bs*tmp;
3320         idx       = ishift + bs2*block + j + bs*point;
3321         a->a[idx] = (MatScalar)aa[nzcountb++];
3322       }
3323     }
3324     /* zero out the mask elements we set */
3325     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3326   }
3327   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3328 
3329   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3330   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3331   ierr = PetscFree(aa);CHKERRQ(ierr);
3332   ierr = PetscFree(jj);CHKERRQ(ierr);
3333   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3334 
3335   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3336   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3337   PetscFunctionReturn(0);
3338 }
3339 
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatCreateSeqBAIJ"
3342 /*@C
3343    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3344    compressed row) format.  For good matrix assembly performance the
3345    user should preallocate the matrix storage by setting the parameter nz
3346    (or the array nnz).  By setting these parameters accurately, performance
3347    during matrix assembly can be increased by more than a factor of 50.
3348 
3349    Collective on MPI_Comm
3350 
3351    Input Parameters:
3352 +  comm - MPI communicator, set to PETSC_COMM_SELF
3353 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3354           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3355 .  m - number of rows
3356 .  n - number of columns
3357 .  nz - number of nonzero blocks  per block row (same for all rows)
3358 -  nnz - array containing the number of nonzero blocks in the various block rows
3359          (possibly different for each block row) or NULL
3360 
3361    Output Parameter:
3362 .  A - the matrix
3363 
3364    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3365    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3366    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3367 
3368    Options Database Keys:
3369 .   -mat_no_unroll - uses code that does not unroll the loops in the
3370                      block calculations (much slower)
3371 .    -mat_block_size - size of the blocks to use
3372 
3373    Level: intermediate
3374 
3375    Notes:
3376    The number of rows and columns must be divisible by blocksize.
3377 
3378    If the nnz parameter is given then the nz parameter is ignored
3379 
3380    A nonzero block is any block that as 1 or more nonzeros in it
3381 
3382    The block AIJ format is fully compatible with standard Fortran 77
3383    storage.  That is, the stored row and column indices can begin at
3384    either one (as in Fortran) or zero.  See the users' manual for details.
3385 
3386    Specify the preallocated storage with either nz or nnz (not both).
3387    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3388    allocation.  See Users-Manual: ch_mat for details.
3389    matrices.
3390 
3391 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3392 @*/
3393 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3394 {
3395   PetscErrorCode ierr;
3396 
3397   PetscFunctionBegin;
3398   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3399   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3400   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3401   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 #undef __FUNCT__
3406 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3407 /*@C
3408    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3409    per row in the matrix. For good matrix assembly performance the
3410    user should preallocate the matrix storage by setting the parameter nz
3411    (or the array nnz).  By setting these parameters accurately, performance
3412    during matrix assembly can be increased by more than a factor of 50.
3413 
3414    Collective on MPI_Comm
3415 
3416    Input Parameters:
3417 +  B - the matrix
3418 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3419           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3420 .  nz - number of block nonzeros per block row (same for all rows)
3421 -  nnz - array containing the number of block nonzeros in the various block rows
3422          (possibly different for each block row) or NULL
3423 
3424    Options Database Keys:
3425 .   -mat_no_unroll - uses code that does not unroll the loops in the
3426                      block calculations (much slower)
3427 .    -mat_block_size - size of the blocks to use
3428 
3429    Level: intermediate
3430 
3431    Notes:
3432    If the nnz parameter is given then the nz parameter is ignored
3433 
3434    You can call MatGetInfo() to get information on how effective the preallocation was;
3435    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3436    You can also run with the option -info and look for messages with the string
3437    malloc in them to see if additional memory allocation was needed.
3438 
3439    The block AIJ format is fully compatible with standard Fortran 77
3440    storage.  That is, the stored row and column indices can begin at
3441    either one (as in Fortran) or zero.  See the users' manual for details.
3442 
3443    Specify the preallocated storage with either nz or nnz (not both).
3444    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3445    allocation.  See Users-Manual: ch_mat for details.
3446 
3447 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3448 @*/
3449 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3450 {
3451   PetscErrorCode ierr;
3452 
3453   PetscFunctionBegin;
3454   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3455   PetscValidType(B,1);
3456   PetscValidLogicalCollectiveInt(B,bs,2);
3457   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3458   PetscFunctionReturn(0);
3459 }
3460 
3461 #undef __FUNCT__
3462 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3463 /*@C
3464    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3465    (the default sequential PETSc format).
3466 
3467    Collective on MPI_Comm
3468 
3469    Input Parameters:
3470 +  B - the matrix
3471 .  i - the indices into j for the start of each local row (starts with zero)
3472 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3473 -  v - optional values in the matrix
3474 
3475    Level: developer
3476 
3477    Notes:
3478    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3479    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3480    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3481    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3482    block column and the second index is over columns within a block.
3483 
3484 .keywords: matrix, aij, compressed row, sparse
3485 
3486 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3487 @*/
3488 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3489 {
3490   PetscErrorCode ierr;
3491 
3492   PetscFunctionBegin;
3493   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3494   PetscValidType(B,1);
3495   PetscValidLogicalCollectiveInt(B,bs,2);
3496   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3497   PetscFunctionReturn(0);
3498 }
3499 
3500 
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3503 /*@
3504      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3505 
3506      Collective on MPI_Comm
3507 
3508    Input Parameters:
3509 +  comm - must be an MPI communicator of size 1
3510 .  bs - size of block
3511 .  m - number of rows
3512 .  n - number of columns
3513 .  i - row indices
3514 .  j - column indices
3515 -  a - matrix values
3516 
3517    Output Parameter:
3518 .  mat - the matrix
3519 
3520    Level: advanced
3521 
3522    Notes:
3523        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3524     once the matrix is destroyed
3525 
3526        You cannot set new nonzero locations into this matrix, that will generate an error.
3527 
3528        The i and j indices are 0 based
3529 
3530        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3531 
3532       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3533       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3534       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3535       with column-major ordering within blocks.
3536 
3537 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3538 
3539 @*/
3540 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3541 {
3542   PetscErrorCode ierr;
3543   PetscInt       ii;
3544   Mat_SeqBAIJ    *baij;
3545 
3546   PetscFunctionBegin;
3547   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3548   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3549 
3550   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3551   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3552   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3553   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3554   baij = (Mat_SeqBAIJ*)(*mat)->data;
3555   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3556   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3557 
3558   baij->i = i;
3559   baij->j = j;
3560   baij->a = a;
3561 
3562   baij->singlemalloc = PETSC_FALSE;
3563   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3564   baij->free_a       = PETSC_FALSE;
3565   baij->free_ij      = PETSC_FALSE;
3566 
3567   for (ii=0; ii<m; ii++) {
3568     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3569 #if defined(PETSC_USE_DEBUG)
3570     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3571 #endif
3572   }
3573 #if defined(PETSC_USE_DEBUG)
3574   for (ii=0; ii<baij->i[m]; ii++) {
3575     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3576     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3577   }
3578 #endif
3579 
3580   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3581   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3582   PetscFunctionReturn(0);
3583 }
3584 
3585 #undef __FUNCT__
3586 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3588 {
3589   PetscErrorCode ierr;
3590 
3591   PetscFunctionBegin;
3592   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3593   PetscFunctionReturn(0);
3594 }
3595