xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f0bfc80ca7e22dfe39d098baf6a1d0eeb11bf71f)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   allowzeropivot = PetscNot(A->erroriffailure);
24 
25   if (a->idiagvalid) {
26     if (values) *values = a->idiag;
27     PetscFunctionReturn(0);
28   }
29   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
30   diag_offset = a->diag;
31   if (!a->idiag) {
32     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
33     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
34   }
35   diag  = a->idiag;
36   mdiag = a->idiag+bs2*mbs;
37   if (values) *values = a->idiag;
38   /* factor and invert each block */
39   switch (bs) {
40   case 1:
41     for (i=0; i<mbs; i++) {
42       odiag    = v + 1*diag_offset[i];
43       diag[0]  = odiag[0];
44       mdiag[0] = odiag[0];
45 
46       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
47         if (allowzeropivot) {
48           A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
49           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
50         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i);
51       }
52 
53       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
54       diag    += 1;
55       mdiag   += 1;
56     }
57     break;
58   case 2:
59     for (i=0; i<mbs; i++) {
60       odiag    = v + 4*diag_offset[i];
61       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
62       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
63       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
64       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
65       diag    += 4;
66       mdiag   += 4;
67     }
68     break;
69   case 3:
70     for (i=0; i<mbs; i++) {
71       odiag    = v + 9*diag_offset[i];
72       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
73       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
74       diag[8]  = odiag[8];
75       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
76       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
77       mdiag[8] = odiag[8];
78       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
79       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80       diag    += 9;
81       mdiag   += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
88       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92       mdiag += 16;
93     }
94     break;
95   case 5:
96     for (i=0; i<mbs; i++) {
97       odiag  = v + 25*diag_offset[i];
98       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
99       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
100       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
101       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
102       diag  += 25;
103       mdiag += 25;
104     }
105     break;
106   case 6:
107     for (i=0; i<mbs; i++) {
108       odiag  = v + 36*diag_offset[i];
109       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
110       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
111       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
112       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
113       diag  += 36;
114       mdiag += 36;
115     }
116     break;
117   case 7:
118     for (i=0; i<mbs; i++) {
119       odiag  = v + 49*diag_offset[i];
120       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
121       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
122       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
123       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
124       diag  += 49;
125       mdiag += 49;
126     }
127     break;
128   default:
129     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
130     for (i=0; i<mbs; i++) {
131       odiag  = v + bs2*diag_offset[i];
132       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
133       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
134       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag  += bs2;
137       mdiag += bs2;
138     }
139     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
140   }
141   a->idiagvalid = PETSC_TRUE;
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatSOR_SeqBAIJ"
147 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
148 {
149   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
150   PetscScalar       *x,*work,*w,*workt,*t;
151   const MatScalar   *v,*aa = a->a, *idiag;
152   const PetscScalar *b,*xb;
153   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
154   PetscErrorCode    ierr;
155   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
156   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
157 
158   PetscFunctionBegin;
159   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
160   its = its*lits;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
163   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
164   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
165   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
166 
167   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
168 
169   if (!m) PetscFunctionReturn(0);
170   diag  = a->diag;
171   idiag = a->idiag;
172   k    = PetscMax(A->rmap->n,A->cmap->n);
173   if (!a->mult_work) {
174     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
175   }
176   if (!a->sor_workt) {
177     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
178   }
179   if (!a->sor_work) {
180     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
181   }
182   work = a->mult_work;
183   t    = a->sor_workt;
184   w    = a->sor_work;
185 
186   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
187   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
188 
189   if (flag & SOR_ZERO_INITIAL_GUESS) {
190     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
191       switch (bs) {
192       case 1:
193         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
194         t[0] = b[0];
195         i2     = 1;
196         idiag += 1;
197         for (i=1; i<m; i++) {
198           v  = aa + ai[i];
199           vi = aj + ai[i];
200           nz = diag[i] - ai[i];
201           s[0] = b[i2];
202           for (j=0; j<nz; j++) {
203             xw[0] = x[vi[j]];
204             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
205           }
206           t[i2] = s[0];
207           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
208           x[i2]  = xw[0];
209           idiag += 1;
210           i2    += 1;
211         }
212         break;
213       case 2:
214         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
215         t[0] = b[0]; t[1] = b[1];
216         i2     = 2;
217         idiag += 4;
218         for (i=1; i<m; i++) {
219           v  = aa + 4*ai[i];
220           vi = aj + ai[i];
221           nz = diag[i] - ai[i];
222           s[0] = b[i2]; s[1] = b[i2+1];
223           for (j=0; j<nz; j++) {
224             idx = 2*vi[j];
225             it  = 4*j;
226             xw[0] = x[idx]; xw[1] = x[1+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
228           }
229           t[i2] = s[0]; t[i2+1] = s[1];
230           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
231           x[i2]   = xw[0]; x[i2+1] = xw[1];
232           idiag  += 4;
233           i2     += 2;
234         }
235         break;
236       case 3:
237         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
238         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
239         i2     = 3;
240         idiag += 9;
241         for (i=1; i<m; i++) {
242           v  = aa + 9*ai[i];
243           vi = aj + ai[i];
244           nz = diag[i] - ai[i];
245           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
246           while (nz--) {
247             idx = 3*(*vi++);
248             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
249             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
250             v  += 9;
251           }
252           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
253           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
254           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
255           idiag  += 9;
256           i2     += 3;
257         }
258         break;
259       case 4:
260         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
261         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
262         i2     = 4;
263         idiag += 16;
264         for (i=1; i<m; i++) {
265           v  = aa + 16*ai[i];
266           vi = aj + ai[i];
267           nz = diag[i] - ai[i];
268           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
269           while (nz--) {
270             idx = 4*(*vi++);
271             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
272             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
273             v  += 16;
274           }
275           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
276           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
277           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
278           idiag  += 16;
279           i2     += 4;
280         }
281         break;
282       case 5:
283         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
284         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
285         i2     = 5;
286         idiag += 25;
287         for (i=1; i<m; i++) {
288           v  = aa + 25*ai[i];
289           vi = aj + ai[i];
290           nz = diag[i] - ai[i];
291           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
292           while (nz--) {
293             idx = 5*(*vi++);
294             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
295             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
296             v  += 25;
297           }
298           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
299           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
300           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
301           idiag  += 25;
302           i2     += 5;
303         }
304         break;
305       case 6:
306         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
307         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
308         i2     = 6;
309         idiag += 36;
310         for (i=1; i<m; i++) {
311           v  = aa + 36*ai[i];
312           vi = aj + ai[i];
313           nz = diag[i] - ai[i];
314           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
315           while (nz--) {
316             idx = 6*(*vi++);
317             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
318             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
319             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
320             v  += 36;
321           }
322           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
323           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
324           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
325           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
326           idiag  += 36;
327           i2     += 6;
328         }
329         break;
330       case 7:
331         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
332         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
333         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
334         i2     = 7;
335         idiag += 49;
336         for (i=1; i<m; i++) {
337           v  = aa + 49*ai[i];
338           vi = aj + ai[i];
339           nz = diag[i] - ai[i];
340           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
341           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
342           while (nz--) {
343             idx = 7*(*vi++);
344             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
345             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
346             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
347             v  += 49;
348           }
349           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
350           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
351           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
352           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
353           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
354           idiag  += 49;
355           i2     += 7;
356         }
357         break;
358       default:
359         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
360         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
361         i2     = bs;
362         idiag += bs2;
363         for (i=1; i<m; i++) {
364           v  = aa + bs2*ai[i];
365           vi = aj + ai[i];
366           nz = diag[i] - ai[i];
367 
368           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
369           /* copy all rows of x that are needed into contiguous space */
370           workt = work;
371           for (j=0; j<nz; j++) {
372             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
373             workt += bs;
374           }
375           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
376           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
377           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
378 
379           idiag += bs2;
380           i2    += bs;
381         }
382         break;
383       }
384       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
386       xb = t;
387     }
388     else xb = b;
389     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
390       idiag = a->idiag+bs2*(a->mbs-1);
391       i2 = bs * (m-1);
392       switch (bs) {
393       case 1:
394         s[0]  = xb[i2];
395         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396         x[i2] = xw[0];
397         i2   -= 1;
398         for (i=m-2; i>=0; i--) {
399           v  = aa + (diag[i]+1);
400           vi = aj + diag[i] + 1;
401           nz = ai[i+1] - diag[i] - 1;
402           s[0] = xb[i2];
403           for (j=0; j<nz; j++) {
404             xw[0] = x[vi[j]];
405             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
406           }
407           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
408           x[i2]  = xw[0];
409           idiag -= 1;
410           i2    -= 1;
411         }
412         break;
413       case 2:
414         s[0]  = xb[i2]; s[1] = xb[i2+1];
415         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
416         x[i2] = xw[0]; x[i2+1] = xw[1];
417         i2    -= 2;
418         idiag -= 4;
419         for (i=m-2; i>=0; i--) {
420           v  = aa + 4*(diag[i] + 1);
421           vi = aj + diag[i] + 1;
422           nz = ai[i+1] - diag[i] - 1;
423           s[0] = xb[i2]; s[1] = xb[i2+1];
424           for (j=0; j<nz; j++) {
425             idx = 2*vi[j];
426             it  = 4*j;
427             xw[0] = x[idx]; xw[1] = x[1+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
429           }
430           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
431           x[i2]   = xw[0]; x[i2+1] = xw[1];
432           idiag  -= 4;
433           i2     -= 2;
434         }
435         break;
436       case 3:
437         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
438         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
439         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
440         i2    -= 3;
441         idiag -= 9;
442         for (i=m-2; i>=0; i--) {
443           v  = aa + 9*(diag[i]+1);
444           vi = aj + diag[i] + 1;
445           nz = ai[i+1] - diag[i] - 1;
446           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
447           while (nz--) {
448             idx = 3*(*vi++);
449             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
450             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
451             v  += 9;
452           }
453           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
454           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
455           idiag  -= 9;
456           i2     -= 3;
457         }
458         break;
459       case 4:
460         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
461         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
462         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
463         i2    -= 4;
464         idiag -= 16;
465         for (i=m-2; i>=0; i--) {
466           v  = aa + 16*(diag[i]+1);
467           vi = aj + diag[i] + 1;
468           nz = ai[i+1] - diag[i] - 1;
469           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
470           while (nz--) {
471             idx = 4*(*vi++);
472             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
473             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
474             v  += 16;
475           }
476           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
477           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
478           idiag  -= 16;
479           i2     -= 4;
480         }
481         break;
482       case 5:
483         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
484         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
485         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
486         i2    -= 5;
487         idiag -= 25;
488         for (i=m-2; i>=0; i--) {
489           v  = aa + 25*(diag[i]+1);
490           vi = aj + diag[i] + 1;
491           nz = ai[i+1] - diag[i] - 1;
492           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
493           while (nz--) {
494             idx = 5*(*vi++);
495             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
496             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
497             v  += 25;
498           }
499           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
500           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
501           idiag  -= 25;
502           i2     -= 5;
503         }
504         break;
505       case 6:
506         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
507         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
508         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
509         i2    -= 6;
510         idiag -= 36;
511         for (i=m-2; i>=0; i--) {
512           v  = aa + 36*(diag[i]+1);
513           vi = aj + diag[i] + 1;
514           nz = ai[i+1] - diag[i] - 1;
515           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
516           while (nz--) {
517             idx = 6*(*vi++);
518             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
519             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
520             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
521             v  += 36;
522           }
523           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
524           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
525           idiag  -= 36;
526           i2     -= 6;
527         }
528         break;
529       case 7:
530         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
531         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
532         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
533         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535         i2    -= 7;
536         idiag -= 49;
537         for (i=m-2; i>=0; i--) {
538           v  = aa + 49*(diag[i]+1);
539           vi = aj + diag[i] + 1;
540           nz = ai[i+1] - diag[i] - 1;
541           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
542           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
543           while (nz--) {
544             idx = 7*(*vi++);
545             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
546             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
547             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
548             v  += 49;
549           }
550           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
551           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
552           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
553           idiag  -= 49;
554           i2     -= 7;
555         }
556         break;
557       default:
558         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
559         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
560         i2    -= bs;
561         idiag -= bs2;
562         for (i=m-2; i>=0; i--) {
563           v  = aa + bs2*(diag[i]+1);
564           vi = aj + diag[i] + 1;
565           nz = ai[i+1] - diag[i] - 1;
566 
567           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
568           /* copy all rows of x that are needed into contiguous space */
569           workt = work;
570           for (j=0; j<nz; j++) {
571             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
572             workt += bs;
573           }
574           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
575           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
576 
577           idiag -= bs2;
578           i2    -= bs;
579         }
580         break;
581       }
582       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
583     }
584     its--;
585   }
586   while (its--) {
587     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
588       idiag = a->idiag;
589       i2 = 0;
590       switch (bs) {
591       case 1:
592         for (i=0; i<m; i++) {
593           v  = aa + ai[i];
594           vi = aj + ai[i];
595           nz = ai[i+1] - ai[i];
596           s[0] = b[i2];
597           for (j=0; j<nz; j++) {
598             xw[0] = x[vi[j]];
599             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
600           }
601           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
602           x[i2] += xw[0];
603           idiag += 1;
604           i2    += 1;
605         }
606         break;
607       case 2:
608         for (i=0; i<m; i++) {
609           v  = aa + 4*ai[i];
610           vi = aj + ai[i];
611           nz = ai[i+1] - ai[i];
612           s[0] = b[i2]; s[1] = b[i2+1];
613           for (j=0; j<nz; j++) {
614             idx = 2*vi[j];
615             it  = 4*j;
616             xw[0] = x[idx]; xw[1] = x[1+idx];
617             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
618           }
619           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
620           x[i2]  += xw[0]; x[i2+1] += xw[1];
621           idiag  += 4;
622           i2     += 2;
623         }
624         break;
625       case 3:
626         for (i=0; i<m; i++) {
627           v  = aa + 9*ai[i];
628           vi = aj + ai[i];
629           nz = ai[i+1] - ai[i];
630           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
631           while (nz--) {
632             idx = 3*(*vi++);
633             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
634             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
635             v  += 9;
636           }
637           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
638           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
639           idiag  += 9;
640           i2     += 3;
641         }
642         break;
643       case 4:
644         for (i=0; i<m; i++) {
645           v  = aa + 16*ai[i];
646           vi = aj + ai[i];
647           nz = ai[i+1] - ai[i];
648           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
649           while (nz--) {
650             idx = 4*(*vi++);
651             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
652             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
653             v  += 16;
654           }
655           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
656           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
657           idiag  += 16;
658           i2     += 4;
659         }
660         break;
661       case 5:
662         for (i=0; i<m; i++) {
663           v  = aa + 25*ai[i];
664           vi = aj + ai[i];
665           nz = ai[i+1] - ai[i];
666           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
667           while (nz--) {
668             idx = 5*(*vi++);
669             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
671             v  += 25;
672           }
673           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
675           idiag  += 25;
676           i2     += 5;
677         }
678         break;
679       case 6:
680         for (i=0; i<m; i++) {
681           v  = aa + 36*ai[i];
682           vi = aj + ai[i];
683           nz = ai[i+1] - ai[i];
684           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
685           while (nz--) {
686             idx = 6*(*vi++);
687             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
688             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
689             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
690             v  += 36;
691           }
692           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
693           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
694           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
695           idiag  += 36;
696           i2     += 6;
697         }
698         break;
699       case 7:
700         for (i=0; i<m; i++) {
701           v  = aa + 49*ai[i];
702           vi = aj + ai[i];
703           nz = ai[i+1] - ai[i];
704           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
705           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
706           while (nz--) {
707             idx = 7*(*vi++);
708             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
709             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
710             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
711             v  += 49;
712           }
713           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
714           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
715           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
716           idiag  += 49;
717           i2     += 7;
718         }
719         break;
720       default:
721         for (i=0; i<m; i++) {
722           v  = aa + bs2*ai[i];
723           vi = aj + ai[i];
724           nz = ai[i+1] - ai[i];
725 
726           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
727           /* copy all rows of x that are needed into contiguous space */
728           workt = work;
729           for (j=0; j<nz; j++) {
730             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
731             workt += bs;
732           }
733           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
734           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
735 
736           idiag += bs2;
737           i2    += bs;
738         }
739         break;
740       }
741       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
742     }
743     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
744       idiag = a->idiag+bs2*(a->mbs-1);
745       i2 = bs * (m-1);
746       switch (bs) {
747       case 1:
748         for (i=m-1; i>=0; i--) {
749           v  = aa + ai[i];
750           vi = aj + ai[i];
751           nz = ai[i+1] - ai[i];
752           s[0] = b[i2];
753           for (j=0; j<nz; j++) {
754             xw[0] = x[vi[j]];
755             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
756           }
757           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
758           x[i2] += xw[0];
759           idiag -= 1;
760           i2    -= 1;
761         }
762         break;
763       case 2:
764         for (i=m-1; i>=0; i--) {
765           v  = aa + 4*ai[i];
766           vi = aj + ai[i];
767           nz = ai[i+1] - ai[i];
768           s[0] = b[i2]; s[1] = b[i2+1];
769           for (j=0; j<nz; j++) {
770             idx = 2*vi[j];
771             it  = 4*j;
772             xw[0] = x[idx]; xw[1] = x[1+idx];
773             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
774           }
775           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
776           x[i2]  += xw[0]; x[i2+1] += xw[1];
777           idiag  -= 4;
778           i2     -= 2;
779         }
780         break;
781       case 3:
782         for (i=m-1; i>=0; i--) {
783           v  = aa + 9*ai[i];
784           vi = aj + ai[i];
785           nz = ai[i+1] - ai[i];
786           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
787           while (nz--) {
788             idx = 3*(*vi++);
789             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
790             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
791             v  += 9;
792           }
793           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
794           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
795           idiag  -= 9;
796           i2     -= 3;
797         }
798         break;
799       case 4:
800         for (i=m-1; i>=0; i--) {
801           v  = aa + 16*ai[i];
802           vi = aj + ai[i];
803           nz = ai[i+1] - ai[i];
804           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
805           while (nz--) {
806             idx = 4*(*vi++);
807             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
808             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
809             v  += 16;
810           }
811           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
812           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
813           idiag  -= 16;
814           i2     -= 4;
815         }
816         break;
817       case 5:
818         for (i=m-1; i>=0; i--) {
819           v  = aa + 25*ai[i];
820           vi = aj + ai[i];
821           nz = ai[i+1] - ai[i];
822           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
823           while (nz--) {
824             idx = 5*(*vi++);
825             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
827             v  += 25;
828           }
829           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
831           idiag  -= 25;
832           i2     -= 5;
833         }
834         break;
835       case 6:
836         for (i=m-1; i>=0; i--) {
837           v  = aa + 36*ai[i];
838           vi = aj + ai[i];
839           nz = ai[i+1] - ai[i];
840           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
841           while (nz--) {
842             idx = 6*(*vi++);
843             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
844             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
845             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
846             v  += 36;
847           }
848           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
849           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
850           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
851           idiag  -= 36;
852           i2     -= 6;
853         }
854         break;
855       case 7:
856         for (i=m-1; i>=0; i--) {
857           v  = aa + 49*ai[i];
858           vi = aj + ai[i];
859           nz = ai[i+1] - ai[i];
860           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
861           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
862           while (nz--) {
863             idx = 7*(*vi++);
864             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
865             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
866             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
867             v  += 49;
868           }
869           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
870           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
871           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
872           idiag  -= 49;
873           i2     -= 7;
874         }
875         break;
876       default:
877         for (i=m-1; i>=0; i--) {
878           v  = aa + bs2*ai[i];
879           vi = aj + ai[i];
880           nz = ai[i+1] - ai[i];
881 
882           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
883           /* copy all rows of x that are needed into contiguous space */
884           workt = work;
885           for (j=0; j<nz; j++) {
886             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
887             workt += bs;
888           }
889           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
890           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
891 
892           idiag -= bs2;
893           i2    -= bs;
894         }
895         break;
896       }
897       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
898     }
899   }
900   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 
906 /*
907     Special version for direct calls from Fortran (Used in PETSc-fun3d)
908 */
909 #if defined(PETSC_HAVE_FORTRAN_CAPS)
910 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
911 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
912 #define matsetvaluesblocked4_ matsetvaluesblocked4
913 #endif
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "matsetvaluesblocked4_"
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatSetValues4_"
993 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
994 {
995   Mat         A  = *AA;
996   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
997   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
998   PetscInt    *ai=a->i,*ailen=a->ilen;
999   PetscInt    *aj=a->j,brow,bcol;
1000   PetscInt    ridx,cidx,lastcol = -1;
1001   MatScalar   *ap,value,*aa=a->a,*bap;
1002 
1003   PetscFunctionBegin;
1004   for (k=0; k<m; k++) { /* loop over added rows */
1005     row  = im[k]; brow = row/4;
1006     rp   = aj + ai[brow];
1007     ap   = aa + 16*ai[brow];
1008     nrow = ailen[brow];
1009     low  = 0;
1010     high = nrow;
1011     for (l=0; l<n; l++) { /* loop over added columns */
1012       col   = in[l]; bcol = col/4;
1013       ridx  = row % 4; cidx = col % 4;
1014       value = v[l + k*n];
1015       if (col <= lastcol)  low = 0;
1016       else                high = nrow;
1017       lastcol = col;
1018       while (high-low > 7) {
1019         t = (low+high)/2;
1020         if (rp[t] > bcol) high = t;
1021         else              low  = t;
1022       }
1023       for (i=low; i<high; i++) {
1024         if (rp[i] > bcol) break;
1025         if (rp[i] == bcol) {
1026           bap   = ap +  16*i + 4*cidx + ridx;
1027           *bap += value;
1028           goto noinsert1;
1029         }
1030       }
1031       N = nrow++ - 1;
1032       high++; /* added new column thus must search to one higher than before */
1033       /* shift up all the later entries in this row */
1034       for (ii=N; ii>=i; ii--) {
1035         rp[ii+1] = rp[ii];
1036         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1037       }
1038       if (N>=i) {
1039         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1040       }
1041       rp[i]                    = bcol;
1042       ap[16*i + 4*cidx + ridx] = value;
1043 noinsert1:;
1044       low = i;
1045     }
1046     ailen[brow] = nrow;
1047   }
1048   PetscFunctionReturnVoid();
1049 }
1050 
1051 /*
1052      Checks for missing diagonals
1053 */
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1057 {
1058   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1059   PetscErrorCode ierr;
1060   PetscInt       *diag,*ii = a->i,i;
1061 
1062   PetscFunctionBegin;
1063   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1064   *missing = PETSC_FALSE;
1065   if (A->rmap->n > 0 && !ii) {
1066     *missing = PETSC_TRUE;
1067     if (d) *d = 0;
1068     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069   } else {
1070     diag = a->diag;
1071     for (i=0; i<a->mbs; i++) {
1072       if (diag[i] >= ii[i+1]) {
1073         *missing = PETSC_TRUE;
1074         if (d) *d = i;
1075         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076         break;
1077       }
1078     }
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1085 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1086 {
1087   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1088   PetscErrorCode ierr;
1089   PetscInt       i,j,m = a->mbs;
1090 
1091   PetscFunctionBegin;
1092   if (!a->diag) {
1093     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1094     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1095     a->free_diag = PETSC_TRUE;
1096   }
1097   for (i=0; i<m; i++) {
1098     a->diag[i] = a->i[i+1];
1099     for (j=a->i[i]; j<a->i[i+1]; j++) {
1100       if (a->j[j] == i) {
1101         a->diag[i] = j;
1102         break;
1103       }
1104     }
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1112 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1113 {
1114   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1115   PetscErrorCode ierr;
1116   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1117   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1118 
1119   PetscFunctionBegin;
1120   *nn = n;
1121   if (!ia) PetscFunctionReturn(0);
1122   if (symmetric) {
1123     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1124     nz   = tia[n];
1125   } else {
1126     tia = a->i; tja = a->j;
1127   }
1128 
1129   if (!blockcompressed && bs > 1) {
1130     (*nn) *= bs;
1131     /* malloc & create the natural set of indices */
1132     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1133     if (n) {
1134       (*ia)[0] = 0;
1135       for (j=1; j<bs; j++) {
1136         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1137       }
1138     }
1139 
1140     for (i=1; i<n; i++) {
1141       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1142       for (j=1; j<bs; j++) {
1143         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1144       }
1145     }
1146     if (n) {
1147       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1148     }
1149 
1150     if (inja) {
1151       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1152       cnt = 0;
1153       for (i=0; i<n; i++) {
1154         for (j=0; j<bs; j++) {
1155           for (k=tia[i]; k<tia[i+1]; k++) {
1156             for (l=0; l<bs; l++) {
1157               (*ja)[cnt++] = bs*tja[k] + l;
1158             }
1159           }
1160         }
1161       }
1162     }
1163 
1164     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1165       ierr = PetscFree(tia);CHKERRQ(ierr);
1166       ierr = PetscFree(tja);CHKERRQ(ierr);
1167     }
1168   } else if (oshift == 1) {
1169     if (symmetric) {
1170       nz = tia[A->rmap->n/bs];
1171       /*  add 1 to i and j indices */
1172       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1173       *ia = tia;
1174       if (ja) {
1175         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1176         *ja = tja;
1177       }
1178     } else {
1179       nz = a->i[A->rmap->n/bs];
1180       /* malloc space and  add 1 to i and j indices */
1181       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1182       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1183       if (ja) {
1184         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1185         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1186       }
1187     }
1188   } else {
1189     *ia = tia;
1190     if (ja) *ja = tja;
1191   }
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1197 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1198 {
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   if (!ia) PetscFunctionReturn(0);
1203   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1204     ierr = PetscFree(*ia);CHKERRQ(ierr);
1205     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1212 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1213 {
1214   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218 #if defined(PETSC_USE_LOG)
1219   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1220 #endif
1221   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1222   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1223   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1224   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1225   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1226   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1227   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1228   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1229   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1230   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1231   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1232   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1233   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1234 
1235   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1236   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1237   ierr = PetscFree(A->data);CHKERRQ(ierr);
1238 
1239   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1255 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1256 {
1257   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   switch (op) {
1262   case MAT_ROW_ORIENTED:
1263     a->roworiented = flg;
1264     break;
1265   case MAT_KEEP_NONZERO_PATTERN:
1266     a->keepnonzeropattern = flg;
1267     break;
1268   case MAT_NEW_NONZERO_LOCATIONS:
1269     a->nonew = (flg ? 0 : 1);
1270     break;
1271   case MAT_NEW_NONZERO_LOCATION_ERR:
1272     a->nonew = (flg ? -1 : 0);
1273     break;
1274   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1275     a->nonew = (flg ? -2 : 0);
1276     break;
1277   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1278     a->nounused = (flg ? -1 : 0);
1279     break;
1280   case MAT_NEW_DIAGONALS:
1281   case MAT_IGNORE_OFF_PROC_ENTRIES:
1282   case MAT_USE_HASH_TABLE:
1283     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1284     break;
1285   case MAT_SPD:
1286   case MAT_SYMMETRIC:
1287   case MAT_STRUCTURALLY_SYMMETRIC:
1288   case MAT_HERMITIAN:
1289   case MAT_SYMMETRY_ETERNAL:
1290     /* These options are handled directly by MatSetOption() */
1291     break;
1292   default:
1293     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1301 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1302 {
1303   PetscErrorCode ierr;
1304   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1305   MatScalar      *aa_i;
1306   PetscScalar    *v_i;
1307 
1308   PetscFunctionBegin;
1309   bs  = A->rmap->bs;
1310   bs2 = bs*bs;
1311   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1312 
1313   bn  = row/bs;   /* Block number */
1314   bp  = row % bs; /* Block Position */
1315   M   = ai[bn+1] - ai[bn];
1316   *nz = bs*M;
1317 
1318   if (v) {
1319     *v = 0;
1320     if (*nz) {
1321       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1322       for (i=0; i<M; i++) { /* for each block in the block row */
1323         v_i  = *v + i*bs;
1324         aa_i = aa + bs2*(ai[bn] + i);
1325         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1326       }
1327     }
1328   }
1329 
1330   if (idx) {
1331     *idx = 0;
1332     if (*nz) {
1333       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1334       for (i=0; i<M; i++) { /* for each block in the block row */
1335         idx_i = *idx + i*bs;
1336         itmp  = bs*aj[ai[bn] + i];
1337         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1338       }
1339     }
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1346 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1347 {
1348   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1358 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1359 {
1360   PetscErrorCode ierr;
1361 
1362   PetscFunctionBegin;
1363   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1364   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1372 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1373 {
1374   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1375   Mat            C;
1376   PetscErrorCode ierr;
1377   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1378   PetscInt       *rows,*cols,bs2=a->bs2;
1379   MatScalar      *array;
1380 
1381   PetscFunctionBegin;
1382   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1383   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1384     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1385 
1386     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1387     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1388     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1389     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1390     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1391     ierr = PetscFree(col);CHKERRQ(ierr);
1392   } else {
1393     C = *B;
1394   }
1395 
1396   array = a->a;
1397   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1398   for (i=0; i<mbs; i++) {
1399     cols[0] = i*bs;
1400     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1401     len = ai[i+1] - ai[i];
1402     for (j=0; j<len; j++) {
1403       rows[0] = (*aj++)*bs;
1404       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1405       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1406       array += bs2;
1407     }
1408   }
1409   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1410 
1411   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413 
1414   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1415     *B = C;
1416   } else {
1417     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1424 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1425 {
1426   PetscErrorCode ierr;
1427   Mat            Btrans;
1428 
1429   PetscFunctionBegin;
1430   *f   = PETSC_FALSE;
1431   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1432   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1433   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1439 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1440 {
1441   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1442   PetscErrorCode ierr;
1443   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1444   int            fd;
1445   PetscScalar    *aa;
1446   FILE           *file;
1447 
1448   PetscFunctionBegin;
1449   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1450   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1451   col_lens[0] = MAT_FILE_CLASSID;
1452 
1453   col_lens[1] = A->rmap->N;
1454   col_lens[2] = A->cmap->n;
1455   col_lens[3] = a->nz*bs2;
1456 
1457   /* store lengths of each row and write (including header) to file */
1458   count = 0;
1459   for (i=0; i<a->mbs; i++) {
1460     for (j=0; j<bs; j++) {
1461       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1462     }
1463   }
1464   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1465   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1466 
1467   /* store column indices (zero start index) */
1468   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1469   count = 0;
1470   for (i=0; i<a->mbs; i++) {
1471     for (j=0; j<bs; j++) {
1472       for (k=a->i[i]; k<a->i[i+1]; k++) {
1473         for (l=0; l<bs; l++) {
1474           jj[count++] = bs*a->j[k] + l;
1475         }
1476       }
1477     }
1478   }
1479   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1480   ierr = PetscFree(jj);CHKERRQ(ierr);
1481 
1482   /* store nonzero values */
1483   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1484   count = 0;
1485   for (i=0; i<a->mbs; i++) {
1486     for (j=0; j<bs; j++) {
1487       for (k=a->i[i]; k<a->i[i+1]; k++) {
1488         for (l=0; l<bs; l++) {
1489           aa[count++] = a->a[bs2*k + l*bs + j];
1490         }
1491       }
1492     }
1493   }
1494   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1495   ierr = PetscFree(aa);CHKERRQ(ierr);
1496 
1497   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1498   if (file) {
1499     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1506 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1507 {
1508   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1509   PetscErrorCode    ierr;
1510   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1511   PetscViewerFormat format;
1512 
1513   PetscFunctionBegin;
1514   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1515   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1516     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1517   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1518     const char *matname;
1519     Mat        aij;
1520     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1521     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1522     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1523     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1524     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1525   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1526       PetscFunctionReturn(0);
1527   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1528     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1529     for (i=0; i<a->mbs; i++) {
1530       for (j=0; j<bs; j++) {
1531         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1532         for (k=a->i[i]; k<a->i[i+1]; k++) {
1533           for (l=0; l<bs; l++) {
1534 #if defined(PETSC_USE_COMPLEX)
1535             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1537                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             }
1544 #else
1545             if (a->a[bs2*k + l*bs + j] != 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1547             }
1548 #endif
1549           }
1550         }
1551         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1552       }
1553     }
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   } else {
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1557     for (i=0; i<a->mbs; i++) {
1558       for (j=0; j<bs; j++) {
1559         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1560         for (k=a->i[i]; k<a->i[i+1]; k++) {
1561           for (l=0; l<bs; l++) {
1562 #if defined(PETSC_USE_COMPLEX)
1563             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1565                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1568                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             }
1572 #else
1573             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1574 #endif
1575           }
1576         }
1577         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1578       }
1579     }
1580     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1581   }
1582   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #include <petscdraw.h>
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590 {
1591   Mat               A = (Mat) Aa;
1592   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1593   PetscErrorCode    ierr;
1594   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596   MatScalar         *aa;
1597   PetscViewer       viewer;
1598   PetscViewerFormat format;
1599 
1600   PetscFunctionBegin;
1601   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1602   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1603 
1604   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1605 
1606   /* loop over matrix elements drawing boxes */
1607 
1608   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1609     color = PETSC_DRAW_BLUE;
1610     for (i=0,row=0; i<mbs; i++,row+=bs) {
1611       for (j=a->i[i]; j<a->i[i+1]; j++) {
1612         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1613         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1614         aa  = a->a + j*bs2;
1615         for (k=0; k<bs; k++) {
1616           for (l=0; l<bs; l++) {
1617             if (PetscRealPart(*aa++) >=  0.) continue;
1618             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1619           }
1620         }
1621       }
1622     }
1623     color = PETSC_DRAW_CYAN;
1624     for (i=0,row=0; i<mbs; i++,row+=bs) {
1625       for (j=a->i[i]; j<a->i[i+1]; j++) {
1626         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1627         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1628         aa  = a->a + j*bs2;
1629         for (k=0; k<bs; k++) {
1630           for (l=0; l<bs; l++) {
1631             if (PetscRealPart(*aa++) != 0.) continue;
1632             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1633           }
1634         }
1635       }
1636     }
1637     color = PETSC_DRAW_RED;
1638     for (i=0,row=0; i<mbs; i++,row+=bs) {
1639       for (j=a->i[i]; j<a->i[i+1]; j++) {
1640         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642         aa  = a->a + j*bs2;
1643         for (k=0; k<bs; k++) {
1644           for (l=0; l<bs; l++) {
1645             if (PetscRealPart(*aa++) <= 0.) continue;
1646             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647           }
1648         }
1649       }
1650     }
1651   } else {
1652     /* use contour shading to indicate magnitude of values */
1653     /* first determine max of all nonzero values */
1654     PetscReal minv = 0.0, maxv = 0.0;
1655     PetscDraw popup;
1656 
1657     for (i=0; i<a->nz*a->bs2; i++) {
1658       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1659     }
1660     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1661     if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1662     for (i=0,row=0; i<mbs; i++,row+=bs) {
1663       for (j=a->i[i]; j<a->i[i+1]; j++) {
1664         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1665         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1666         aa  = a->a + j*bs2;
1667         for (k=0; k<bs; k++) {
1668           for (l=0; l<bs; l++) {
1669             color = PetscDrawRealToColor(PetscAbsScalar(*aa++),minv,maxv);
1670             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1671           }
1672         }
1673       }
1674     }
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1681 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1682 {
1683   PetscErrorCode ierr;
1684   PetscReal      xl,yl,xr,yr,w,h;
1685   PetscDraw      draw;
1686   PetscBool      isnull;
1687 
1688   PetscFunctionBegin;
1689   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1690   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1691 
1692   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1693   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1694   xr  += w;    yr += h;  xl = -w;     yl = -h;
1695   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1696   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1697   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatView_SeqBAIJ"
1703 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1704 {
1705   PetscErrorCode ierr;
1706   PetscBool      iascii,isbinary,isdraw;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1710   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1712   if (iascii) {
1713     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1714   } else if (isbinary) {
1715     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1716   } else if (isdraw) {
1717     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1718   } else {
1719     Mat B;
1720     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1721     ierr = MatView(B,viewer);CHKERRQ(ierr);
1722     ierr = MatDestroy(&B);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1730 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1731 {
1732   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1733   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1734   PetscInt    *ai = a->i,*ailen = a->ilen;
1735   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1736   MatScalar   *ap,*aa = a->a;
1737 
1738   PetscFunctionBegin;
1739   for (k=0; k<m; k++) { /* loop over rows */
1740     row = im[k]; brow = row/bs;
1741     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1742     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1743     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1744     nrow = ailen[brow];
1745     for (l=0; l<n; l++) { /* loop over columns */
1746       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1747       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1748       col  = in[l];
1749       bcol = col/bs;
1750       cidx = col%bs;
1751       ridx = row%bs;
1752       high = nrow;
1753       low  = 0; /* assume unsorted */
1754       while (high-low > 5) {
1755         t = (low+high)/2;
1756         if (rp[t] > bcol) high = t;
1757         else             low  = t;
1758       }
1759       for (i=low; i<high; i++) {
1760         if (rp[i] > bcol) break;
1761         if (rp[i] == bcol) {
1762           *v++ = ap[bs2*i+bs*cidx+ridx];
1763           goto finished;
1764         }
1765       }
1766       *v++ = 0.0;
1767 finished:;
1768     }
1769   }
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1775 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1776 {
1777   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1778   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1779   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1780   PetscErrorCode    ierr;
1781   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1782   PetscBool         roworiented=a->roworiented;
1783   const PetscScalar *value     = v;
1784   MatScalar         *ap,*aa = a->a,*bap;
1785 
1786   PetscFunctionBegin;
1787   if (roworiented) {
1788     stepval = (n-1)*bs;
1789   } else {
1790     stepval = (m-1)*bs;
1791   }
1792   for (k=0; k<m; k++) { /* loop over added rows */
1793     row = im[k];
1794     if (row < 0) continue;
1795 #if defined(PETSC_USE_DEBUG)
1796     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1797 #endif
1798     rp   = aj + ai[row];
1799     ap   = aa + bs2*ai[row];
1800     rmax = imax[row];
1801     nrow = ailen[row];
1802     low  = 0;
1803     high = nrow;
1804     for (l=0; l<n; l++) { /* loop over added columns */
1805       if (in[l] < 0) continue;
1806 #if defined(PETSC_USE_DEBUG)
1807       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1808 #endif
1809       col = in[l];
1810       if (roworiented) {
1811         value = v + (k*(stepval+bs) + l)*bs;
1812       } else {
1813         value = v + (l*(stepval+bs) + k)*bs;
1814       }
1815       if (col <= lastcol) low = 0;
1816       else high = nrow;
1817       lastcol = col;
1818       while (high-low > 7) {
1819         t = (low+high)/2;
1820         if (rp[t] > col) high = t;
1821         else             low  = t;
1822       }
1823       for (i=low; i<high; i++) {
1824         if (rp[i] > col) break;
1825         if (rp[i] == col) {
1826           bap = ap +  bs2*i;
1827           if (roworiented) {
1828             if (is == ADD_VALUES) {
1829               for (ii=0; ii<bs; ii++,value+=stepval) {
1830                 for (jj=ii; jj<bs2; jj+=bs) {
1831                   bap[jj] += *value++;
1832                 }
1833               }
1834             } else {
1835               for (ii=0; ii<bs; ii++,value+=stepval) {
1836                 for (jj=ii; jj<bs2; jj+=bs) {
1837                   bap[jj] = *value++;
1838                 }
1839               }
1840             }
1841           } else {
1842             if (is == ADD_VALUES) {
1843               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1844                 for (jj=0; jj<bs; jj++) {
1845                   bap[jj] += value[jj];
1846                 }
1847                 bap += bs;
1848               }
1849             } else {
1850               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1851                 for (jj=0; jj<bs; jj++) {
1852                   bap[jj]  = value[jj];
1853                 }
1854                 bap += bs;
1855               }
1856             }
1857           }
1858           goto noinsert2;
1859         }
1860       }
1861       if (nonew == 1) goto noinsert2;
1862       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1863       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1864       N = nrow++ - 1; high++;
1865       /* shift up all the later entries in this row */
1866       for (ii=N; ii>=i; ii--) {
1867         rp[ii+1] = rp[ii];
1868         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1869       }
1870       if (N >= i) {
1871         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1872       }
1873       rp[i] = col;
1874       bap   = ap +  bs2*i;
1875       if (roworiented) {
1876         for (ii=0; ii<bs; ii++,value+=stepval) {
1877           for (jj=ii; jj<bs2; jj+=bs) {
1878             bap[jj] = *value++;
1879           }
1880         }
1881       } else {
1882         for (ii=0; ii<bs; ii++,value+=stepval) {
1883           for (jj=0; jj<bs; jj++) {
1884             *bap++ = *value++;
1885           }
1886         }
1887       }
1888 noinsert2:;
1889       low = i;
1890     }
1891     ailen[row] = nrow;
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1898 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1899 {
1900   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1901   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1902   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1903   PetscErrorCode ierr;
1904   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1905   MatScalar      *aa  = a->a,*ap;
1906   PetscReal      ratio=0.6;
1907 
1908   PetscFunctionBegin;
1909   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1910 
1911   if (m) rmax = ailen[0];
1912   for (i=1; i<mbs; i++) {
1913     /* move each row back by the amount of empty slots (fshift) before it*/
1914     fshift += imax[i-1] - ailen[i-1];
1915     rmax    = PetscMax(rmax,ailen[i]);
1916     if (fshift) {
1917       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1918       N  = ailen[i];
1919       for (j=0; j<N; j++) {
1920         ip[j-fshift] = ip[j];
1921 
1922         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1923       }
1924     }
1925     ai[i] = ai[i-1] + ailen[i-1];
1926   }
1927   if (mbs) {
1928     fshift += imax[mbs-1] - ailen[mbs-1];
1929     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1930   }
1931 
1932   /* reset ilen and imax for each row */
1933   a->nonzerorowcnt = 0;
1934   for (i=0; i<mbs; i++) {
1935     ailen[i] = imax[i] = ai[i+1] - ai[i];
1936     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1937   }
1938   a->nz = ai[mbs];
1939 
1940   /* diagonals may have moved, so kill the diagonal pointers */
1941   a->idiagvalid = PETSC_FALSE;
1942   if (fshift && a->diag) {
1943     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1944     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1945     a->diag = 0;
1946   }
1947   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1949   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1951 
1952   A->info.mallocs    += a->reallocs;
1953   a->reallocs         = 0;
1954   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1955   a->rmax             = rmax;
1956 
1957   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 /*
1962    This function returns an array of flags which indicate the locations of contiguous
1963    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1964    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1965    Assume: sizes should be long enough to hold all the values.
1966 */
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1969 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1970 {
1971   PetscInt  i,j,k,row;
1972   PetscBool flg;
1973 
1974   PetscFunctionBegin;
1975   for (i=0,j=0; i<n; j++) {
1976     row = idx[i];
1977     if (row%bs!=0) { /* Not the begining of a block */
1978       sizes[j] = 1;
1979       i++;
1980     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1981       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1982       i++;
1983     } else { /* Begining of the block, so check if the complete block exists */
1984       flg = PETSC_TRUE;
1985       for (k=1; k<bs; k++) {
1986         if (row+k != idx[i+k]) { /* break in the block */
1987           flg = PETSC_FALSE;
1988           break;
1989         }
1990       }
1991       if (flg) { /* No break in the bs */
1992         sizes[j] = bs;
1993         i       += bs;
1994       } else {
1995         sizes[j] = 1;
1996         i++;
1997       }
1998     }
1999   }
2000   *bs_max = j;
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2006 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2007 {
2008   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2009   PetscErrorCode    ierr;
2010   PetscInt          i,j,k,count,*rows;
2011   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2012   PetscScalar       zero = 0.0;
2013   MatScalar         *aa;
2014   const PetscScalar *xx;
2015   PetscScalar       *bb;
2016 
2017   PetscFunctionBegin;
2018   /* fix right hand side if needed */
2019   if (x && b) {
2020     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2021     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2022     for (i=0; i<is_n; i++) {
2023       bb[is_idx[i]] = diag*xx[is_idx[i]];
2024     }
2025     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2026     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2027   }
2028 
2029   /* Make a copy of the IS and  sort it */
2030   /* allocate memory for rows,sizes */
2031   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2032 
2033   /* copy IS values to rows, and sort them */
2034   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2035   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2036 
2037   if (baij->keepnonzeropattern) {
2038     for (i=0; i<is_n; i++) sizes[i] = 1;
2039     bs_max          = is_n;
2040   } else {
2041     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2042     A->nonzerostate++;
2043   }
2044 
2045   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2046     row = rows[j];
2047     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2048     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2049     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2050     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2051       if (diag != (PetscScalar)0.0) {
2052         if (baij->ilen[row/bs] > 0) {
2053           baij->ilen[row/bs]       = 1;
2054           baij->j[baij->i[row/bs]] = row/bs;
2055 
2056           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2057         }
2058         /* Now insert all the diagonal values for this bs */
2059         for (k=0; k<bs; k++) {
2060           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2061         }
2062       } else { /* (diag == 0.0) */
2063         baij->ilen[row/bs] = 0;
2064       } /* end (diag == 0.0) */
2065     } else { /* (sizes[i] != bs) */
2066 #if defined(PETSC_USE_DEBUG)
2067       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2068 #endif
2069       for (k=0; k<count; k++) {
2070         aa[0] =  zero;
2071         aa   += bs;
2072       }
2073       if (diag != (PetscScalar)0.0) {
2074         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2075       }
2076     }
2077   }
2078 
2079   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2080   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2086 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2087 {
2088   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2089   PetscErrorCode    ierr;
2090   PetscInt          i,j,k,count;
2091   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2092   PetscScalar       zero = 0.0;
2093   MatScalar         *aa;
2094   const PetscScalar *xx;
2095   PetscScalar       *bb;
2096   PetscBool         *zeroed,vecs = PETSC_FALSE;
2097 
2098   PetscFunctionBegin;
2099   /* fix right hand side if needed */
2100   if (x && b) {
2101     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2102     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2103     vecs = PETSC_TRUE;
2104   }
2105 
2106   /* zero the columns */
2107   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2108   for (i=0; i<is_n; i++) {
2109     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2110     zeroed[is_idx[i]] = PETSC_TRUE;
2111   }
2112   for (i=0; i<A->rmap->N; i++) {
2113     if (!zeroed[i]) {
2114       row = i/bs;
2115       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2116         for (k=0; k<bs; k++) {
2117           col = bs*baij->j[j] + k;
2118           if (zeroed[col]) {
2119             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2120             if (vecs) bb[i] -= aa[0]*xx[col];
2121             aa[0] = 0.0;
2122           }
2123         }
2124       }
2125     } else if (vecs) bb[i] = diag*xx[i];
2126   }
2127   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2128   if (vecs) {
2129     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2130     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2131   }
2132 
2133   /* zero the rows */
2134   for (i=0; i<is_n; i++) {
2135     row   = is_idx[i];
2136     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2137     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2138     for (k=0; k<count; k++) {
2139       aa[0] =  zero;
2140       aa   += bs;
2141     }
2142     if (diag != (PetscScalar)0.0) {
2143       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2144     }
2145   }
2146   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2152 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2153 {
2154   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2155   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2156   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2157   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2158   PetscErrorCode ierr;
2159   PetscInt       ridx,cidx,bs2=a->bs2;
2160   PetscBool      roworiented=a->roworiented;
2161   MatScalar      *ap,value,*aa=a->a,*bap;
2162 
2163   PetscFunctionBegin;
2164   for (k=0; k<m; k++) { /* loop over added rows */
2165     row  = im[k];
2166     brow = row/bs;
2167     if (row < 0) continue;
2168 #if defined(PETSC_USE_DEBUG)
2169     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2170 #endif
2171     rp   = aj + ai[brow];
2172     ap   = aa + bs2*ai[brow];
2173     rmax = imax[brow];
2174     nrow = ailen[brow];
2175     low  = 0;
2176     high = nrow;
2177     for (l=0; l<n; l++) { /* loop over added columns */
2178       if (in[l] < 0) continue;
2179 #if defined(PETSC_USE_DEBUG)
2180       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2181 #endif
2182       col  = in[l]; bcol = col/bs;
2183       ridx = row % bs; cidx = col % bs;
2184       if (roworiented) {
2185         value = v[l + k*n];
2186       } else {
2187         value = v[k + l*m];
2188       }
2189       if (col <= lastcol) low = 0; else high = nrow;
2190       lastcol = col;
2191       while (high-low > 7) {
2192         t = (low+high)/2;
2193         if (rp[t] > bcol) high = t;
2194         else              low  = t;
2195       }
2196       for (i=low; i<high; i++) {
2197         if (rp[i] > bcol) break;
2198         if (rp[i] == bcol) {
2199           bap = ap +  bs2*i + bs*cidx + ridx;
2200           if (is == ADD_VALUES) *bap += value;
2201           else                  *bap  = value;
2202           goto noinsert1;
2203         }
2204       }
2205       if (nonew == 1) goto noinsert1;
2206       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2207       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2208       N = nrow++ - 1; high++;
2209       /* shift up all the later entries in this row */
2210       for (ii=N; ii>=i; ii--) {
2211         rp[ii+1] = rp[ii];
2212         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2213       }
2214       if (N>=i) {
2215         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2216       }
2217       rp[i]                      = bcol;
2218       ap[bs2*i + bs*cidx + ridx] = value;
2219       a->nz++;
2220       A->nonzerostate++;
2221 noinsert1:;
2222       low = i;
2223     }
2224     ailen[brow] = nrow;
2225   }
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2231 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2232 {
2233   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2234   Mat            outA;
2235   PetscErrorCode ierr;
2236   PetscBool      row_identity,col_identity;
2237 
2238   PetscFunctionBegin;
2239   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2240   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2241   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2242   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2243 
2244   outA            = inA;
2245   inA->factortype = MAT_FACTOR_LU;
2246 
2247   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2248 
2249   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2250   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2251   a->row = row;
2252   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2253   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2254   a->col = col;
2255 
2256   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2257   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2258   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2259   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2260 
2261   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2262   if (!a->solve_work) {
2263     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2264     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2265   }
2266   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2272 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2273 {
2274   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2275   PetscInt    i,nz,mbs;
2276 
2277   PetscFunctionBegin;
2278   nz  = baij->maxnz;
2279   mbs = baij->mbs;
2280   for (i=0; i<nz; i++) {
2281     baij->j[i] = indices[i];
2282   }
2283   baij->nz = nz;
2284   for (i=0; i<mbs; i++) {
2285     baij->ilen[i] = baij->imax[i];
2286   }
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2292 /*@
2293     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2294        in the matrix.
2295 
2296   Input Parameters:
2297 +  mat - the SeqBAIJ matrix
2298 -  indices - the column indices
2299 
2300   Level: advanced
2301 
2302   Notes:
2303     This can be called if you have precomputed the nonzero structure of the
2304   matrix and want to provide it to the matrix object to improve the performance
2305   of the MatSetValues() operation.
2306 
2307     You MUST have set the correct numbers of nonzeros per row in the call to
2308   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2309 
2310     MUST be called before any calls to MatSetValues();
2311 
2312 @*/
2313 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2314 {
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2319   PetscValidPointer(indices,2);
2320   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2326 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2327 {
2328   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2329   PetscErrorCode ierr;
2330   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2331   PetscReal      atmp;
2332   PetscScalar    *x,zero = 0.0;
2333   MatScalar      *aa;
2334   PetscInt       ncols,brow,krow,kcol;
2335 
2336   PetscFunctionBegin;
2337   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2338   bs  = A->rmap->bs;
2339   aa  = a->a;
2340   ai  = a->i;
2341   aj  = a->j;
2342   mbs = a->mbs;
2343 
2344   ierr = VecSet(v,zero);CHKERRQ(ierr);
2345   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2346   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2347   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2348   for (i=0; i<mbs; i++) {
2349     ncols = ai[1] - ai[0]; ai++;
2350     brow  = bs*i;
2351     for (j=0; j<ncols; j++) {
2352       for (kcol=0; kcol<bs; kcol++) {
2353         for (krow=0; krow<bs; krow++) {
2354           atmp = PetscAbsScalar(*aa);aa++;
2355           row  = brow + krow;   /* row index */
2356           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2357         }
2358       }
2359       aj++;
2360     }
2361   }
2362   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatCopy_SeqBAIJ"
2368 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2369 {
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   /* If the two matrices have the same copy implementation, use fast copy. */
2374   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2375     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2376     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2377     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2378 
2379     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2380     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2381     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2382   } else {
2383     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2384   }
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2390 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2391 {
2392   PetscErrorCode ierr;
2393 
2394   PetscFunctionBegin;
2395   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2401 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2402 {
2403   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2404 
2405   PetscFunctionBegin;
2406   *array = a->a;
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2412 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2413 {
2414   PetscFunctionBegin;
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2420 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2421 {
2422   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2423   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2424   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2425   PetscErrorCode ierr;
2426 
2427   PetscFunctionBegin;
2428   /* Set the number of nonzeros in the new matrix */
2429   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2435 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2436 {
2437   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2438   PetscErrorCode ierr;
2439   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2440   PetscBLASInt   one=1;
2441 
2442   PetscFunctionBegin;
2443   if (str == SAME_NONZERO_PATTERN) {
2444     PetscScalar  alpha = a;
2445     PetscBLASInt bnz;
2446     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2447     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2448     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2449   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2450     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2451   } else {
2452     Mat      B;
2453     PetscInt *nnz;
2454     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2455     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2456     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2457     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2458     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2459     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2460     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2461     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2462     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2463     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2464     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2465     ierr = PetscFree(nnz);CHKERRQ(ierr);
2466   }
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2472 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2473 {
2474   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2475   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2476   MatScalar   *aa = a->a;
2477 
2478   PetscFunctionBegin;
2479   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2485 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2486 {
2487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2488   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2489   MatScalar   *aa = a->a;
2490 
2491   PetscFunctionBegin;
2492   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2498 /*
2499     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2500 */
2501 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2502 {
2503   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2504   PetscErrorCode ierr;
2505   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2506   PetscInt       nz = a->i[m],row,*jj,mr,col;
2507 
2508   PetscFunctionBegin;
2509   *nn = n;
2510   if (!ia) PetscFunctionReturn(0);
2511   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2512   else {
2513     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2514     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2515     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2516     jj   = a->j;
2517     for (i=0; i<nz; i++) {
2518       collengths[jj[i]]++;
2519     }
2520     cia[0] = oshift;
2521     for (i=0; i<n; i++) {
2522       cia[i+1] = cia[i] + collengths[i];
2523     }
2524     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2525     jj   = a->j;
2526     for (row=0; row<m; row++) {
2527       mr = a->i[row+1] - a->i[row];
2528       for (i=0; i<mr; i++) {
2529         col = *jj++;
2530 
2531         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2532       }
2533     }
2534     ierr = PetscFree(collengths);CHKERRQ(ierr);
2535     *ia  = cia; *ja = cja;
2536   }
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2542 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2543 {
2544   PetscErrorCode ierr;
2545 
2546   PetscFunctionBegin;
2547   if (!ia) PetscFunctionReturn(0);
2548   ierr = PetscFree(*ia);CHKERRQ(ierr);
2549   ierr = PetscFree(*ja);CHKERRQ(ierr);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 /*
2554  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2555  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2556  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2557  */
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2560 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2561 {
2562   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2563   PetscErrorCode ierr;
2564   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2565   PetscInt       nz = a->i[m],row,*jj,mr,col;
2566   PetscInt       *cspidx;
2567 
2568   PetscFunctionBegin;
2569   *nn = n;
2570   if (!ia) PetscFunctionReturn(0);
2571 
2572   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2573   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2574   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2575   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2576   jj   = a->j;
2577   for (i=0; i<nz; i++) {
2578     collengths[jj[i]]++;
2579   }
2580   cia[0] = oshift;
2581   for (i=0; i<n; i++) {
2582     cia[i+1] = cia[i] + collengths[i];
2583   }
2584   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2585   jj   = a->j;
2586   for (row=0; row<m; row++) {
2587     mr = a->i[row+1] - a->i[row];
2588     for (i=0; i<mr; i++) {
2589       col = *jj++;
2590       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2591       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2592     }
2593   }
2594   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2595   *ia    = cia; *ja = cja;
2596   *spidx = cspidx;
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 #undef __FUNCT__
2601 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2602 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2603 {
2604   PetscErrorCode ierr;
2605 
2606   PetscFunctionBegin;
2607   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2608   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 #undef __FUNCT__
2613 #define __FUNCT__ "MatShift_SeqBAIJ"
2614 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2615 {
2616   PetscErrorCode ierr;
2617   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2618 
2619   PetscFunctionBegin;
2620   if (!Y->preallocated || !aij->nz) {
2621     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2622   }
2623   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 /* -------------------------------------------------------------------*/
2628 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2629                                        MatGetRow_SeqBAIJ,
2630                                        MatRestoreRow_SeqBAIJ,
2631                                        MatMult_SeqBAIJ_N,
2632                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2633                                        MatMultTranspose_SeqBAIJ,
2634                                        MatMultTransposeAdd_SeqBAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                /* 10*/ 0,
2639                                        MatLUFactor_SeqBAIJ,
2640                                        0,
2641                                        0,
2642                                        MatTranspose_SeqBAIJ,
2643                                /* 15*/ MatGetInfo_SeqBAIJ,
2644                                        MatEqual_SeqBAIJ,
2645                                        MatGetDiagonal_SeqBAIJ,
2646                                        MatDiagonalScale_SeqBAIJ,
2647                                        MatNorm_SeqBAIJ,
2648                                /* 20*/ 0,
2649                                        MatAssemblyEnd_SeqBAIJ,
2650                                        MatSetOption_SeqBAIJ,
2651                                        MatZeroEntries_SeqBAIJ,
2652                                /* 24*/ MatZeroRows_SeqBAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                /* 29*/ MatSetUp_SeqBAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                /* 34*/ MatDuplicate_SeqBAIJ,
2663                                        0,
2664                                        0,
2665                                        MatILUFactor_SeqBAIJ,
2666                                        0,
2667                                /* 39*/ MatAXPY_SeqBAIJ,
2668                                        MatGetSubMatrices_SeqBAIJ,
2669                                        MatIncreaseOverlap_SeqBAIJ,
2670                                        MatGetValues_SeqBAIJ,
2671                                        MatCopy_SeqBAIJ,
2672                                /* 44*/ 0,
2673                                        MatScale_SeqBAIJ,
2674                                        MatShift_SeqBAIJ,
2675                                        0,
2676                                        MatZeroRowsColumns_SeqBAIJ,
2677                                /* 49*/ 0,
2678                                        MatGetRowIJ_SeqBAIJ,
2679                                        MatRestoreRowIJ_SeqBAIJ,
2680                                        MatGetColumnIJ_SeqBAIJ,
2681                                        MatRestoreColumnIJ_SeqBAIJ,
2682                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        MatSetValuesBlocked_SeqBAIJ,
2687                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2688                                        MatDestroy_SeqBAIJ,
2689                                        MatView_SeqBAIJ,
2690                                        0,
2691                                        0,
2692                                /* 64*/ 0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2698                                        0,
2699                                        MatConvert_Basic,
2700                                        0,
2701                                        0,
2702                                /* 74*/ 0,
2703                                        MatFDColoringApply_BAIJ,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /* 79*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        MatLoad_SeqBAIJ,
2712                                /* 84*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /* 89*/ 0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                /* 94*/ 0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                /* 99*/ 0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                /*104*/ 0,
2733                                        MatRealPart_SeqBAIJ,
2734                                        MatImaginaryPart_SeqBAIJ,
2735                                        0,
2736                                        0,
2737                                /*109*/ 0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                        MatMissingDiagonal_SeqBAIJ,
2742                                /*114*/ 0,
2743                                        0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                /*119*/ 0,
2748                                        0,
2749                                        MatMultHermitianTranspose_SeqBAIJ,
2750                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2751                                        0,
2752                                /*124*/ 0,
2753                                        0,
2754                                        MatInvertBlockDiagonal_SeqBAIJ,
2755                                        0,
2756                                        0,
2757                                /*129*/ 0,
2758                                        0,
2759                                        0,
2760                                        0,
2761                                        0,
2762                                /*134*/ 0,
2763                                        0,
2764                                        0,
2765                                        0,
2766                                        0,
2767                                /*139*/ 0,
2768                                        0,
2769                                        0,
2770                                        MatFDColoringSetUp_SeqXAIJ,
2771                                        0,
2772                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2773 };
2774 
2775 #undef __FUNCT__
2776 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2777 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2778 {
2779   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2780   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2781   PetscErrorCode ierr;
2782 
2783   PetscFunctionBegin;
2784   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2785 
2786   /* allocate space for values if not already there */
2787   if (!aij->saved_values) {
2788     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2789     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2790   }
2791 
2792   /* copy values over */
2793   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 #undef __FUNCT__
2798 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2799 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2800 {
2801   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2802   PetscErrorCode ierr;
2803   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2804 
2805   PetscFunctionBegin;
2806   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2807   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2808 
2809   /* copy values over */
2810   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2815 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2816 
2817 #undef __FUNCT__
2818 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2819 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2820 {
2821   Mat_SeqBAIJ    *b;
2822   PetscErrorCode ierr;
2823   PetscInt       i,mbs,nbs,bs2;
2824   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2825 
2826   PetscFunctionBegin;
2827   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2828   if (nz == MAT_SKIP_ALLOCATION) {
2829     skipallocation = PETSC_TRUE;
2830     nz             = 0;
2831   }
2832 
2833   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2834   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2836   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2837 
2838   B->preallocated = PETSC_TRUE;
2839 
2840   mbs = B->rmap->n/bs;
2841   nbs = B->cmap->n/bs;
2842   bs2 = bs*bs;
2843 
2844   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2845 
2846   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2847   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2848   if (nnz) {
2849     for (i=0; i<mbs; i++) {
2850       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2851       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2852     }
2853   }
2854 
2855   b    = (Mat_SeqBAIJ*)B->data;
2856   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2857   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2858   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2859 
2860   if (!flg) {
2861     switch (bs) {
2862     case 1:
2863       B->ops->mult    = MatMult_SeqBAIJ_1;
2864       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2865       break;
2866     case 2:
2867       B->ops->mult    = MatMult_SeqBAIJ_2;
2868       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2869       break;
2870     case 3:
2871       B->ops->mult    = MatMult_SeqBAIJ_3;
2872       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2873       break;
2874     case 4:
2875       B->ops->mult    = MatMult_SeqBAIJ_4;
2876       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2877       break;
2878     case 5:
2879       B->ops->mult    = MatMult_SeqBAIJ_5;
2880       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2881       break;
2882     case 6:
2883       B->ops->mult    = MatMult_SeqBAIJ_6;
2884       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2885       break;
2886     case 7:
2887       B->ops->mult    = MatMult_SeqBAIJ_7;
2888       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2889       break;
2890     case 15:
2891       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2892       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2893       break;
2894     default:
2895       B->ops->mult    = MatMult_SeqBAIJ_N;
2896       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2897       break;
2898     }
2899   }
2900   B->ops->sor = MatSOR_SeqBAIJ;
2901   b->mbs = mbs;
2902   b->nbs = nbs;
2903   if (!skipallocation) {
2904     if (!b->imax) {
2905       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2906       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2907 
2908       b->free_imax_ilen = PETSC_TRUE;
2909     }
2910     /* b->ilen will count nonzeros in each block row so far. */
2911     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2912     if (!nnz) {
2913       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2914       else if (nz < 0) nz = 1;
2915       for (i=0; i<mbs; i++) b->imax[i] = nz;
2916       nz = nz*mbs;
2917     } else {
2918       nz = 0;
2919       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2920     }
2921 
2922     /* allocate the matrix space */
2923     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2924     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2925     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2926     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2927     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2928 
2929     b->singlemalloc = PETSC_TRUE;
2930     b->i[0]         = 0;
2931     for (i=1; i<mbs+1; i++) {
2932       b->i[i] = b->i[i-1] + b->imax[i-1];
2933     }
2934     b->free_a  = PETSC_TRUE;
2935     b->free_ij = PETSC_TRUE;
2936   } else {
2937     b->free_a  = PETSC_FALSE;
2938     b->free_ij = PETSC_FALSE;
2939   }
2940 
2941   b->bs2              = bs2;
2942   b->mbs              = mbs;
2943   b->nz               = 0;
2944   b->maxnz            = nz;
2945   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2946   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2952 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2953 {
2954   PetscInt       i,m,nz,nz_max=0,*nnz;
2955   PetscScalar    *values=0;
2956   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2957   PetscErrorCode ierr;
2958 
2959   PetscFunctionBegin;
2960   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2961   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2962   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2963   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2964   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2965   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2966   m    = B->rmap->n/bs;
2967 
2968   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2969   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2970   for (i=0; i<m; i++) {
2971     nz = ii[i+1]- ii[i];
2972     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2973     nz_max = PetscMax(nz_max, nz);
2974     nnz[i] = nz;
2975   }
2976   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2977   ierr = PetscFree(nnz);CHKERRQ(ierr);
2978 
2979   values = (PetscScalar*)V;
2980   if (!values) {
2981     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2982   }
2983   for (i=0; i<m; i++) {
2984     PetscInt          ncols  = ii[i+1] - ii[i];
2985     const PetscInt    *icols = jj + ii[i];
2986     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2987     if (!roworiented) {
2988       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2989     } else {
2990       PetscInt j;
2991       for (j=0; j<ncols; j++) {
2992         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2993         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2994       }
2995     }
2996   }
2997   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2998   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2999   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3000   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 /*MC
3005    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3006    block sparse compressed row format.
3007 
3008    Options Database Keys:
3009 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3010 
3011   Level: beginner
3012 
3013 .seealso: MatCreateSeqBAIJ()
3014 M*/
3015 
3016 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3017 
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatCreate_SeqBAIJ"
3020 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3021 {
3022   PetscErrorCode ierr;
3023   PetscMPIInt    size;
3024   Mat_SeqBAIJ    *b;
3025 
3026   PetscFunctionBegin;
3027   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3028   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3029 
3030   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3031   B->data = (void*)b;
3032   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3033 
3034   b->row          = 0;
3035   b->col          = 0;
3036   b->icol         = 0;
3037   b->reallocs     = 0;
3038   b->saved_values = 0;
3039 
3040   b->roworiented        = PETSC_TRUE;
3041   b->nonew              = 0;
3042   b->diag               = 0;
3043   B->spptr              = 0;
3044   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3045   b->keepnonzeropattern = PETSC_FALSE;
3046 
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 #undef __FUNCT__
3062 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3063 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3064 {
3065   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3066   PetscErrorCode ierr;
3067   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3068 
3069   PetscFunctionBegin;
3070   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3071 
3072   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3073     c->imax           = a->imax;
3074     c->ilen           = a->ilen;
3075     c->free_imax_ilen = PETSC_FALSE;
3076   } else {
3077     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3078     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3079     for (i=0; i<mbs; i++) {
3080       c->imax[i] = a->imax[i];
3081       c->ilen[i] = a->ilen[i];
3082     }
3083     c->free_imax_ilen = PETSC_TRUE;
3084   }
3085 
3086   /* allocate the matrix space */
3087   if (mallocmatspace) {
3088     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3089       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3090       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3091 
3092       c->i            = a->i;
3093       c->j            = a->j;
3094       c->singlemalloc = PETSC_FALSE;
3095       c->free_a       = PETSC_TRUE;
3096       c->free_ij      = PETSC_FALSE;
3097       c->parent       = A;
3098       C->preallocated = PETSC_TRUE;
3099       C->assembled    = PETSC_TRUE;
3100 
3101       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3102       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3103       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3104     } else {
3105       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3106       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3107 
3108       c->singlemalloc = PETSC_TRUE;
3109       c->free_a       = PETSC_TRUE;
3110       c->free_ij      = PETSC_TRUE;
3111 
3112       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3113       if (mbs > 0) {
3114         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3115         if (cpvalues == MAT_COPY_VALUES) {
3116           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3117         } else {
3118           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3119         }
3120       }
3121       C->preallocated = PETSC_TRUE;
3122       C->assembled    = PETSC_TRUE;
3123     }
3124   }
3125 
3126   c->roworiented = a->roworiented;
3127   c->nonew       = a->nonew;
3128 
3129   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3130   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3131 
3132   c->bs2         = a->bs2;
3133   c->mbs         = a->mbs;
3134   c->nbs         = a->nbs;
3135 
3136   if (a->diag) {
3137     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3138       c->diag      = a->diag;
3139       c->free_diag = PETSC_FALSE;
3140     } else {
3141       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3142       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3143       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3144       c->free_diag = PETSC_TRUE;
3145     }
3146   } else c->diag = 0;
3147 
3148   c->nz         = a->nz;
3149   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3150   c->solve_work = NULL;
3151   c->mult_work  = NULL;
3152   c->sor_workt  = NULL;
3153   c->sor_work   = NULL;
3154 
3155   c->compressedrow.use   = a->compressedrow.use;
3156   c->compressedrow.nrows = a->compressedrow.nrows;
3157   if (a->compressedrow.use) {
3158     i    = a->compressedrow.nrows;
3159     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3160     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3161     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3162     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3163   } else {
3164     c->compressedrow.use    = PETSC_FALSE;
3165     c->compressedrow.i      = NULL;
3166     c->compressedrow.rindex = NULL;
3167   }
3168   C->nonzerostate = A->nonzerostate;
3169 
3170   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3171   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3177 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3178 {
3179   PetscErrorCode ierr;
3180 
3181   PetscFunctionBegin;
3182   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3183   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3184   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3185   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3186   PetscFunctionReturn(0);
3187 }
3188 
3189 #undef __FUNCT__
3190 #define __FUNCT__ "MatLoad_SeqBAIJ"
3191 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3192 {
3193   Mat_SeqBAIJ    *a;
3194   PetscErrorCode ierr;
3195   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3196   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3197   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3198   PetscInt       *masked,nmask,tmp,bs2,ishift;
3199   PetscMPIInt    size;
3200   int            fd;
3201   PetscScalar    *aa;
3202   MPI_Comm       comm;
3203 
3204   PetscFunctionBegin;
3205   /* force binary viewer to load .info file if it has not yet done so */
3206   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3207   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3208   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3209   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3210   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3211   if (bs < 0) bs = 1;
3212   bs2  = bs*bs;
3213 
3214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3215   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3216   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3217   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3218   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3219   M = header[1]; N = header[2]; nz = header[3];
3220 
3221   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3222   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3223 
3224   /*
3225      This code adds extra rows to make sure the number of rows is
3226     divisible by the blocksize
3227   */
3228   mbs        = M/bs;
3229   extra_rows = bs - M + bs*(mbs);
3230   if (extra_rows == bs) extra_rows = 0;
3231   else mbs++;
3232   if (extra_rows) {
3233     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3234   }
3235 
3236   /* Set global sizes if not already set */
3237   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3238     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3239   } else { /* Check if the matrix global sizes are correct */
3240     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3241     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3242       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3243     }
3244     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3245   }
3246 
3247   /* read in row lengths */
3248   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3249   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3250   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3251 
3252   /* read in column indices */
3253   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3254   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3255   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3256 
3257   /* loop over row lengths determining block row lengths */
3258   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3259   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3260   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3261   rowcount = 0;
3262   nzcount  = 0;
3263   for (i=0; i<mbs; i++) {
3264     nmask = 0;
3265     for (j=0; j<bs; j++) {
3266       kmax = rowlengths[rowcount];
3267       for (k=0; k<kmax; k++) {
3268         tmp = jj[nzcount++]/bs;
3269         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3270       }
3271       rowcount++;
3272     }
3273     browlengths[i] += nmask;
3274     /* zero out the mask elements we set */
3275     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3276   }
3277 
3278   /* Do preallocation  */
3279   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3280   a    = (Mat_SeqBAIJ*)newmat->data;
3281 
3282   /* set matrix "i" values */
3283   a->i[0] = 0;
3284   for (i=1; i<= mbs; i++) {
3285     a->i[i]      = a->i[i-1] + browlengths[i-1];
3286     a->ilen[i-1] = browlengths[i-1];
3287   }
3288   a->nz = 0;
3289   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3290 
3291   /* read in nonzero values */
3292   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3293   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3294   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3295 
3296   /* set "a" and "j" values into matrix */
3297   nzcount = 0; jcount = 0;
3298   for (i=0; i<mbs; i++) {
3299     nzcountb = nzcount;
3300     nmask    = 0;
3301     for (j=0; j<bs; j++) {
3302       kmax = rowlengths[i*bs+j];
3303       for (k=0; k<kmax; k++) {
3304         tmp = jj[nzcount++]/bs;
3305         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3306       }
3307     }
3308     /* sort the masked values */
3309     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3310 
3311     /* set "j" values into matrix */
3312     maskcount = 1;
3313     for (j=0; j<nmask; j++) {
3314       a->j[jcount++]  = masked[j];
3315       mask[masked[j]] = maskcount++;
3316     }
3317     /* set "a" values into matrix */
3318     ishift = bs2*a->i[i];
3319     for (j=0; j<bs; j++) {
3320       kmax = rowlengths[i*bs+j];
3321       for (k=0; k<kmax; k++) {
3322         tmp       = jj[nzcountb]/bs;
3323         block     = mask[tmp] - 1;
3324         point     = jj[nzcountb] - bs*tmp;
3325         idx       = ishift + bs2*block + j + bs*point;
3326         a->a[idx] = (MatScalar)aa[nzcountb++];
3327       }
3328     }
3329     /* zero out the mask elements we set */
3330     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3331   }
3332   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3333 
3334   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3335   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3336   ierr = PetscFree(aa);CHKERRQ(ierr);
3337   ierr = PetscFree(jj);CHKERRQ(ierr);
3338   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3339 
3340   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3341   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 #undef __FUNCT__
3346 #define __FUNCT__ "MatCreateSeqBAIJ"
3347 /*@C
3348    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3349    compressed row) format.  For good matrix assembly performance the
3350    user should preallocate the matrix storage by setting the parameter nz
3351    (or the array nnz).  By setting these parameters accurately, performance
3352    during matrix assembly can be increased by more than a factor of 50.
3353 
3354    Collective on MPI_Comm
3355 
3356    Input Parameters:
3357 +  comm - MPI communicator, set to PETSC_COMM_SELF
3358 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3359           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3360 .  m - number of rows
3361 .  n - number of columns
3362 .  nz - number of nonzero blocks  per block row (same for all rows)
3363 -  nnz - array containing the number of nonzero blocks in the various block rows
3364          (possibly different for each block row) or NULL
3365 
3366    Output Parameter:
3367 .  A - the matrix
3368 
3369    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3370    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3371    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3372 
3373    Options Database Keys:
3374 .   -mat_no_unroll - uses code that does not unroll the loops in the
3375                      block calculations (much slower)
3376 .    -mat_block_size - size of the blocks to use
3377 
3378    Level: intermediate
3379 
3380    Notes:
3381    The number of rows and columns must be divisible by blocksize.
3382 
3383    If the nnz parameter is given then the nz parameter is ignored
3384 
3385    A nonzero block is any block that as 1 or more nonzeros in it
3386 
3387    The block AIJ format is fully compatible with standard Fortran 77
3388    storage.  That is, the stored row and column indices can begin at
3389    either one (as in Fortran) or zero.  See the users' manual for details.
3390 
3391    Specify the preallocated storage with either nz or nnz (not both).
3392    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3393    allocation.  See Users-Manual: ch_mat for details.
3394    matrices.
3395 
3396 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3397 @*/
3398 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3399 {
3400   PetscErrorCode ierr;
3401 
3402   PetscFunctionBegin;
3403   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3404   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3405   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3406   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 #undef __FUNCT__
3411 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3412 /*@C
3413    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3414    per row in the matrix. For good matrix assembly performance the
3415    user should preallocate the matrix storage by setting the parameter nz
3416    (or the array nnz).  By setting these parameters accurately, performance
3417    during matrix assembly can be increased by more than a factor of 50.
3418 
3419    Collective on MPI_Comm
3420 
3421    Input Parameters:
3422 +  B - the matrix
3423 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3424           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3425 .  nz - number of block nonzeros per block row (same for all rows)
3426 -  nnz - array containing the number of block nonzeros in the various block rows
3427          (possibly different for each block row) or NULL
3428 
3429    Options Database Keys:
3430 .   -mat_no_unroll - uses code that does not unroll the loops in the
3431                      block calculations (much slower)
3432 .   -mat_block_size - size of the blocks to use
3433 
3434    Level: intermediate
3435 
3436    Notes:
3437    If the nnz parameter is given then the nz parameter is ignored
3438 
3439    You can call MatGetInfo() to get information on how effective the preallocation was;
3440    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3441    You can also run with the option -info and look for messages with the string
3442    malloc in them to see if additional memory allocation was needed.
3443 
3444    The block AIJ format is fully compatible with standard Fortran 77
3445    storage.  That is, the stored row and column indices can begin at
3446    either one (as in Fortran) or zero.  See the users' manual for details.
3447 
3448    Specify the preallocated storage with either nz or nnz (not both).
3449    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3450    allocation.  See Users-Manual: ch_mat for details.
3451 
3452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3453 @*/
3454 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3455 {
3456   PetscErrorCode ierr;
3457 
3458   PetscFunctionBegin;
3459   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3460   PetscValidType(B,1);
3461   PetscValidLogicalCollectiveInt(B,bs,2);
3462   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 #undef __FUNCT__
3467 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3468 /*@C
3469    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3470    (the default sequential PETSc format).
3471 
3472    Collective on MPI_Comm
3473 
3474    Input Parameters:
3475 +  B - the matrix
3476 .  i - the indices into j for the start of each local row (starts with zero)
3477 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3478 -  v - optional values in the matrix
3479 
3480    Level: developer
3481 
3482    Notes:
3483    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3484    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3485    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3486    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3487    block column and the second index is over columns within a block.
3488 
3489 .keywords: matrix, aij, compressed row, sparse
3490 
3491 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3492 @*/
3493 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3494 {
3495   PetscErrorCode ierr;
3496 
3497   PetscFunctionBegin;
3498   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3499   PetscValidType(B,1);
3500   PetscValidLogicalCollectiveInt(B,bs,2);
3501   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3508 /*@
3509      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3510 
3511      Collective on MPI_Comm
3512 
3513    Input Parameters:
3514 +  comm - must be an MPI communicator of size 1
3515 .  bs - size of block
3516 .  m - number of rows
3517 .  n - number of columns
3518 .  i - row indices
3519 .  j - column indices
3520 -  a - matrix values
3521 
3522    Output Parameter:
3523 .  mat - the matrix
3524 
3525    Level: advanced
3526 
3527    Notes:
3528        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3529     once the matrix is destroyed
3530 
3531        You cannot set new nonzero locations into this matrix, that will generate an error.
3532 
3533        The i and j indices are 0 based
3534 
3535        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3536 
3537       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3538       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3539       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3540       with column-major ordering within blocks.
3541 
3542 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3543 
3544 @*/
3545 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3546 {
3547   PetscErrorCode ierr;
3548   PetscInt       ii;
3549   Mat_SeqBAIJ    *baij;
3550 
3551   PetscFunctionBegin;
3552   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3553   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3554 
3555   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3556   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3557   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3558   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3559   baij = (Mat_SeqBAIJ*)(*mat)->data;
3560   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3561   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3562 
3563   baij->i = i;
3564   baij->j = j;
3565   baij->a = a;
3566 
3567   baij->singlemalloc = PETSC_FALSE;
3568   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3569   baij->free_a       = PETSC_FALSE;
3570   baij->free_ij      = PETSC_FALSE;
3571 
3572   for (ii=0; ii<m; ii++) {
3573     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3574 #if defined(PETSC_USE_DEBUG)
3575     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3576 #endif
3577   }
3578 #if defined(PETSC_USE_DEBUG)
3579   for (ii=0; ii<baij->i[m]; ii++) {
3580     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3581     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3582   }
3583 #endif
3584 
3585   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3586   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3587   PetscFunctionReturn(0);
3588 }
3589 
3590 #undef __FUNCT__
3591 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3592 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3593 {
3594   PetscErrorCode ierr;
3595 
3596   PetscFunctionBegin;
3597   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3598   PetscFunctionReturn(0);
3599 }
3600