xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c6228bba3efb3955e5e29a4ef79e8b65616d20d2)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   allowzeropivot = PetscNot(A->erroriffailure);
24 
25   if (a->idiagvalid) {
26     if (values) *values = a->idiag;
27     PetscFunctionReturn(0);
28   }
29   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
30   diag_offset = a->diag;
31   if (!a->idiag) {
32     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
33     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
34   }
35   diag  = a->idiag;
36   mdiag = a->idiag+bs2*mbs;
37   if (values) *values = a->idiag;
38   /* factor and invert each block */
39   switch (bs) {
40   case 1:
41     for (i=0; i<mbs; i++) {
42       odiag    = v + 1*diag_offset[i];
43       diag[0]  = odiag[0];
44       mdiag[0] = odiag[0];
45 
46       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
47         if (allowzeropivot) {
48           A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
49           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
50         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i);
51       }
52 
53       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
54       diag    += 1;
55       mdiag   += 1;
56     }
57     break;
58   case 2:
59     for (i=0; i<mbs; i++) {
60       odiag    = v + 4*diag_offset[i];
61       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
62       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
63       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
64       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
65       diag    += 4;
66       mdiag   += 4;
67     }
68     break;
69   case 3:
70     for (i=0; i<mbs; i++) {
71       odiag    = v + 9*diag_offset[i];
72       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
73       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
74       diag[8]  = odiag[8];
75       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
76       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
77       mdiag[8] = odiag[8];
78       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
79       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80       diag    += 9;
81       mdiag   += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
88       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92       mdiag += 16;
93     }
94     break;
95   case 5:
96     for (i=0; i<mbs; i++) {
97       odiag  = v + 25*diag_offset[i];
98       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
99       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
100       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
101       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
102       diag  += 25;
103       mdiag += 25;
104     }
105     break;
106   case 6:
107     for (i=0; i<mbs; i++) {
108       odiag  = v + 36*diag_offset[i];
109       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
110       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
111       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
112       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
113       diag  += 36;
114       mdiag += 36;
115     }
116     break;
117   case 7:
118     for (i=0; i<mbs; i++) {
119       odiag  = v + 49*diag_offset[i];
120       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
121       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
122       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
123       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
124       diag  += 49;
125       mdiag += 49;
126     }
127     break;
128   default:
129     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
130     for (i=0; i<mbs; i++) {
131       odiag  = v + bs2*diag_offset[i];
132       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
133       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
134       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag  += bs2;
137       mdiag += bs2;
138     }
139     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
140   }
141   a->idiagvalid = PETSC_TRUE;
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatSOR_SeqBAIJ"
147 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
148 {
149   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
150   PetscScalar       *x,*work,*w,*workt,*t;
151   const MatScalar   *v,*aa = a->a, *idiag;
152   const PetscScalar *b,*xb;
153   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
154   PetscErrorCode    ierr;
155   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
156   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
157 
158   PetscFunctionBegin;
159   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
160   its = its*lits;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
163   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
164   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
165   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
166 
167   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
168 
169   if (!m) PetscFunctionReturn(0);
170   diag  = a->diag;
171   idiag = a->idiag;
172   k    = PetscMax(A->rmap->n,A->cmap->n);
173   if (!a->mult_work) {
174     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
175   }
176   if (!a->sor_workt) {
177     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
178   }
179   if (!a->sor_work) {
180     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
181   }
182   work = a->mult_work;
183   t    = a->sor_workt;
184   w    = a->sor_work;
185 
186   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
187   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
188 
189   if (flag & SOR_ZERO_INITIAL_GUESS) {
190     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
191       switch (bs) {
192       case 1:
193         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
194         t[0] = b[0];
195         i2     = 1;
196         idiag += 1;
197         for (i=1; i<m; i++) {
198           v  = aa + ai[i];
199           vi = aj + ai[i];
200           nz = diag[i] - ai[i];
201           s[0] = b[i2];
202           for (j=0; j<nz; j++) {
203             xw[0] = x[vi[j]];
204             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
205           }
206           t[i2] = s[0];
207           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
208           x[i2]  = xw[0];
209           idiag += 1;
210           i2    += 1;
211         }
212         break;
213       case 2:
214         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
215         t[0] = b[0]; t[1] = b[1];
216         i2     = 2;
217         idiag += 4;
218         for (i=1; i<m; i++) {
219           v  = aa + 4*ai[i];
220           vi = aj + ai[i];
221           nz = diag[i] - ai[i];
222           s[0] = b[i2]; s[1] = b[i2+1];
223           for (j=0; j<nz; j++) {
224             idx = 2*vi[j];
225             it  = 4*j;
226             xw[0] = x[idx]; xw[1] = x[1+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
228           }
229           t[i2] = s[0]; t[i2+1] = s[1];
230           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
231           x[i2]   = xw[0]; x[i2+1] = xw[1];
232           idiag  += 4;
233           i2     += 2;
234         }
235         break;
236       case 3:
237         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
238         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
239         i2     = 3;
240         idiag += 9;
241         for (i=1; i<m; i++) {
242           v  = aa + 9*ai[i];
243           vi = aj + ai[i];
244           nz = diag[i] - ai[i];
245           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
246           while (nz--) {
247             idx = 3*(*vi++);
248             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
249             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
250             v  += 9;
251           }
252           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
253           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
254           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
255           idiag  += 9;
256           i2     += 3;
257         }
258         break;
259       case 4:
260         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
261         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
262         i2     = 4;
263         idiag += 16;
264         for (i=1; i<m; i++) {
265           v  = aa + 16*ai[i];
266           vi = aj + ai[i];
267           nz = diag[i] - ai[i];
268           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
269           while (nz--) {
270             idx = 4*(*vi++);
271             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
272             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
273             v  += 16;
274           }
275           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
276           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
277           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
278           idiag  += 16;
279           i2     += 4;
280         }
281         break;
282       case 5:
283         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
284         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
285         i2     = 5;
286         idiag += 25;
287         for (i=1; i<m; i++) {
288           v  = aa + 25*ai[i];
289           vi = aj + ai[i];
290           nz = diag[i] - ai[i];
291           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
292           while (nz--) {
293             idx = 5*(*vi++);
294             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
295             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
296             v  += 25;
297           }
298           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
299           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
300           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
301           idiag  += 25;
302           i2     += 5;
303         }
304         break;
305       case 6:
306         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
307         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
308         i2     = 6;
309         idiag += 36;
310         for (i=1; i<m; i++) {
311           v  = aa + 36*ai[i];
312           vi = aj + ai[i];
313           nz = diag[i] - ai[i];
314           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
315           while (nz--) {
316             idx = 6*(*vi++);
317             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
318             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
319             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
320             v  += 36;
321           }
322           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
323           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
324           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
325           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
326           idiag  += 36;
327           i2     += 6;
328         }
329         break;
330       case 7:
331         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
332         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
333         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
334         i2     = 7;
335         idiag += 49;
336         for (i=1; i<m; i++) {
337           v  = aa + 49*ai[i];
338           vi = aj + ai[i];
339           nz = diag[i] - ai[i];
340           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
341           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
342           while (nz--) {
343             idx = 7*(*vi++);
344             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
345             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
346             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
347             v  += 49;
348           }
349           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
350           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
351           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
352           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
353           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
354           idiag  += 49;
355           i2     += 7;
356         }
357         break;
358       default:
359         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
360         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
361         i2     = bs;
362         idiag += bs2;
363         for (i=1; i<m; i++) {
364           v  = aa + bs2*ai[i];
365           vi = aj + ai[i];
366           nz = diag[i] - ai[i];
367 
368           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
369           /* copy all rows of x that are needed into contiguous space */
370           workt = work;
371           for (j=0; j<nz; j++) {
372             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
373             workt += bs;
374           }
375           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
376           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
377           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
378 
379           idiag += bs2;
380           i2    += bs;
381         }
382         break;
383       }
384       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
386       xb = t;
387     }
388     else xb = b;
389     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
390       idiag = a->idiag+bs2*(a->mbs-1);
391       i2 = bs * (m-1);
392       switch (bs) {
393       case 1:
394         s[0]  = xb[i2];
395         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396         x[i2] = xw[0];
397         i2   -= 1;
398         for (i=m-2; i>=0; i--) {
399           v  = aa + (diag[i]+1);
400           vi = aj + diag[i] + 1;
401           nz = ai[i+1] - diag[i] - 1;
402           s[0] = xb[i2];
403           for (j=0; j<nz; j++) {
404             xw[0] = x[vi[j]];
405             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
406           }
407           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
408           x[i2]  = xw[0];
409           idiag -= 1;
410           i2    -= 1;
411         }
412         break;
413       case 2:
414         s[0]  = xb[i2]; s[1] = xb[i2+1];
415         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
416         x[i2] = xw[0]; x[i2+1] = xw[1];
417         i2    -= 2;
418         idiag -= 4;
419         for (i=m-2; i>=0; i--) {
420           v  = aa + 4*(diag[i] + 1);
421           vi = aj + diag[i] + 1;
422           nz = ai[i+1] - diag[i] - 1;
423           s[0] = xb[i2]; s[1] = xb[i2+1];
424           for (j=0; j<nz; j++) {
425             idx = 2*vi[j];
426             it  = 4*j;
427             xw[0] = x[idx]; xw[1] = x[1+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
429           }
430           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
431           x[i2]   = xw[0]; x[i2+1] = xw[1];
432           idiag  -= 4;
433           i2     -= 2;
434         }
435         break;
436       case 3:
437         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
438         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
439         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
440         i2    -= 3;
441         idiag -= 9;
442         for (i=m-2; i>=0; i--) {
443           v  = aa + 9*(diag[i]+1);
444           vi = aj + diag[i] + 1;
445           nz = ai[i+1] - diag[i] - 1;
446           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
447           while (nz--) {
448             idx = 3*(*vi++);
449             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
450             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
451             v  += 9;
452           }
453           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
454           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
455           idiag  -= 9;
456           i2     -= 3;
457         }
458         break;
459       case 4:
460         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
461         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
462         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
463         i2    -= 4;
464         idiag -= 16;
465         for (i=m-2; i>=0; i--) {
466           v  = aa + 16*(diag[i]+1);
467           vi = aj + diag[i] + 1;
468           nz = ai[i+1] - diag[i] - 1;
469           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
470           while (nz--) {
471             idx = 4*(*vi++);
472             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
473             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
474             v  += 16;
475           }
476           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
477           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
478           idiag  -= 16;
479           i2     -= 4;
480         }
481         break;
482       case 5:
483         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
484         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
485         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
486         i2    -= 5;
487         idiag -= 25;
488         for (i=m-2; i>=0; i--) {
489           v  = aa + 25*(diag[i]+1);
490           vi = aj + diag[i] + 1;
491           nz = ai[i+1] - diag[i] - 1;
492           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
493           while (nz--) {
494             idx = 5*(*vi++);
495             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
496             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
497             v  += 25;
498           }
499           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
500           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
501           idiag  -= 25;
502           i2     -= 5;
503         }
504         break;
505       case 6:
506         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
507         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
508         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
509         i2    -= 6;
510         idiag -= 36;
511         for (i=m-2; i>=0; i--) {
512           v  = aa + 36*(diag[i]+1);
513           vi = aj + diag[i] + 1;
514           nz = ai[i+1] - diag[i] - 1;
515           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
516           while (nz--) {
517             idx = 6*(*vi++);
518             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
519             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
520             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
521             v  += 36;
522           }
523           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
524           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
525           idiag  -= 36;
526           i2     -= 6;
527         }
528         break;
529       case 7:
530         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
531         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
532         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
533         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535         i2    -= 7;
536         idiag -= 49;
537         for (i=m-2; i>=0; i--) {
538           v  = aa + 49*(diag[i]+1);
539           vi = aj + diag[i] + 1;
540           nz = ai[i+1] - diag[i] - 1;
541           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
542           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
543           while (nz--) {
544             idx = 7*(*vi++);
545             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
546             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
547             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
548             v  += 49;
549           }
550           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
551           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
552           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
553           idiag  -= 49;
554           i2     -= 7;
555         }
556         break;
557       default:
558         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
559         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
560         i2    -= bs;
561         idiag -= bs2;
562         for (i=m-2; i>=0; i--) {
563           v  = aa + bs2*(diag[i]+1);
564           vi = aj + diag[i] + 1;
565           nz = ai[i+1] - diag[i] - 1;
566 
567           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
568           /* copy all rows of x that are needed into contiguous space */
569           workt = work;
570           for (j=0; j<nz; j++) {
571             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
572             workt += bs;
573           }
574           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
575           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
576 
577           idiag -= bs2;
578           i2    -= bs;
579         }
580         break;
581       }
582       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
583     }
584     its--;
585   }
586   while (its--) {
587     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
588       idiag = a->idiag;
589       i2 = 0;
590       switch (bs) {
591       case 1:
592         for (i=0; i<m; i++) {
593           v  = aa + ai[i];
594           vi = aj + ai[i];
595           nz = ai[i+1] - ai[i];
596           s[0] = b[i2];
597           for (j=0; j<nz; j++) {
598             xw[0] = x[vi[j]];
599             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
600           }
601           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
602           x[i2] += xw[0];
603           idiag += 1;
604           i2    += 1;
605         }
606         break;
607       case 2:
608         for (i=0; i<m; i++) {
609           v  = aa + 4*ai[i];
610           vi = aj + ai[i];
611           nz = ai[i+1] - ai[i];
612           s[0] = b[i2]; s[1] = b[i2+1];
613           for (j=0; j<nz; j++) {
614             idx = 2*vi[j];
615             it  = 4*j;
616             xw[0] = x[idx]; xw[1] = x[1+idx];
617             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
618           }
619           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
620           x[i2]  += xw[0]; x[i2+1] += xw[1];
621           idiag  += 4;
622           i2     += 2;
623         }
624         break;
625       case 3:
626         for (i=0; i<m; i++) {
627           v  = aa + 9*ai[i];
628           vi = aj + ai[i];
629           nz = ai[i+1] - ai[i];
630           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
631           while (nz--) {
632             idx = 3*(*vi++);
633             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
634             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
635             v  += 9;
636           }
637           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
638           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
639           idiag  += 9;
640           i2     += 3;
641         }
642         break;
643       case 4:
644         for (i=0; i<m; i++) {
645           v  = aa + 16*ai[i];
646           vi = aj + ai[i];
647           nz = ai[i+1] - ai[i];
648           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
649           while (nz--) {
650             idx = 4*(*vi++);
651             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
652             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
653             v  += 16;
654           }
655           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
656           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
657           idiag  += 16;
658           i2     += 4;
659         }
660         break;
661       case 5:
662         for (i=0; i<m; i++) {
663           v  = aa + 25*ai[i];
664           vi = aj + ai[i];
665           nz = ai[i+1] - ai[i];
666           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
667           while (nz--) {
668             idx = 5*(*vi++);
669             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
671             v  += 25;
672           }
673           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
675           idiag  += 25;
676           i2     += 5;
677         }
678         break;
679       case 6:
680         for (i=0; i<m; i++) {
681           v  = aa + 36*ai[i];
682           vi = aj + ai[i];
683           nz = ai[i+1] - ai[i];
684           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
685           while (nz--) {
686             idx = 6*(*vi++);
687             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
688             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
689             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
690             v  += 36;
691           }
692           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
693           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
694           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
695           idiag  += 36;
696           i2     += 6;
697         }
698         break;
699       case 7:
700         for (i=0; i<m; i++) {
701           v  = aa + 49*ai[i];
702           vi = aj + ai[i];
703           nz = ai[i+1] - ai[i];
704           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
705           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
706           while (nz--) {
707             idx = 7*(*vi++);
708             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
709             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
710             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
711             v  += 49;
712           }
713           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
714           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
715           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
716           idiag  += 49;
717           i2     += 7;
718         }
719         break;
720       default:
721         for (i=0; i<m; i++) {
722           v  = aa + bs2*ai[i];
723           vi = aj + ai[i];
724           nz = ai[i+1] - ai[i];
725 
726           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
727           /* copy all rows of x that are needed into contiguous space */
728           workt = work;
729           for (j=0; j<nz; j++) {
730             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
731             workt += bs;
732           }
733           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
734           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
735 
736           idiag += bs2;
737           i2    += bs;
738         }
739         break;
740       }
741       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
742     }
743     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
744       idiag = a->idiag+bs2*(a->mbs-1);
745       i2 = bs * (m-1);
746       switch (bs) {
747       case 1:
748         for (i=m-1; i>=0; i--) {
749           v  = aa + ai[i];
750           vi = aj + ai[i];
751           nz = ai[i+1] - ai[i];
752           s[0] = b[i2];
753           for (j=0; j<nz; j++) {
754             xw[0] = x[vi[j]];
755             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
756           }
757           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
758           x[i2] += xw[0];
759           idiag -= 1;
760           i2    -= 1;
761         }
762         break;
763       case 2:
764         for (i=m-1; i>=0; i--) {
765           v  = aa + 4*ai[i];
766           vi = aj + ai[i];
767           nz = ai[i+1] - ai[i];
768           s[0] = b[i2]; s[1] = b[i2+1];
769           for (j=0; j<nz; j++) {
770             idx = 2*vi[j];
771             it  = 4*j;
772             xw[0] = x[idx]; xw[1] = x[1+idx];
773             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
774           }
775           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
776           x[i2]  += xw[0]; x[i2+1] += xw[1];
777           idiag  -= 4;
778           i2     -= 2;
779         }
780         break;
781       case 3:
782         for (i=m-1; i>=0; i--) {
783           v  = aa + 9*ai[i];
784           vi = aj + ai[i];
785           nz = ai[i+1] - ai[i];
786           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
787           while (nz--) {
788             idx = 3*(*vi++);
789             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
790             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
791             v  += 9;
792           }
793           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
794           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
795           idiag  -= 9;
796           i2     -= 3;
797         }
798         break;
799       case 4:
800         for (i=m-1; i>=0; i--) {
801           v  = aa + 16*ai[i];
802           vi = aj + ai[i];
803           nz = ai[i+1] - ai[i];
804           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
805           while (nz--) {
806             idx = 4*(*vi++);
807             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
808             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
809             v  += 16;
810           }
811           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
812           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
813           idiag  -= 16;
814           i2     -= 4;
815         }
816         break;
817       case 5:
818         for (i=m-1; i>=0; i--) {
819           v  = aa + 25*ai[i];
820           vi = aj + ai[i];
821           nz = ai[i+1] - ai[i];
822           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
823           while (nz--) {
824             idx = 5*(*vi++);
825             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
827             v  += 25;
828           }
829           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
831           idiag  -= 25;
832           i2     -= 5;
833         }
834         break;
835       case 6:
836         for (i=m-1; i>=0; i--) {
837           v  = aa + 36*ai[i];
838           vi = aj + ai[i];
839           nz = ai[i+1] - ai[i];
840           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
841           while (nz--) {
842             idx = 6*(*vi++);
843             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
844             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
845             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
846             v  += 36;
847           }
848           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
849           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
850           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
851           idiag  -= 36;
852           i2     -= 6;
853         }
854         break;
855       case 7:
856         for (i=m-1; i>=0; i--) {
857           v  = aa + 49*ai[i];
858           vi = aj + ai[i];
859           nz = ai[i+1] - ai[i];
860           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
861           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
862           while (nz--) {
863             idx = 7*(*vi++);
864             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
865             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
866             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
867             v  += 49;
868           }
869           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
870           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
871           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
872           idiag  -= 49;
873           i2     -= 7;
874         }
875         break;
876       default:
877         for (i=m-1; i>=0; i--) {
878           v  = aa + bs2*ai[i];
879           vi = aj + ai[i];
880           nz = ai[i+1] - ai[i];
881 
882           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
883           /* copy all rows of x that are needed into contiguous space */
884           workt = work;
885           for (j=0; j<nz; j++) {
886             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
887             workt += bs;
888           }
889           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
890           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
891 
892           idiag -= bs2;
893           i2    -= bs;
894         }
895         break;
896       }
897       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
898     }
899   }
900   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 
906 /*
907     Special version for direct calls from Fortran (Used in PETSc-fun3d)
908 */
909 #if defined(PETSC_HAVE_FORTRAN_CAPS)
910 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
911 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
912 #define matsetvaluesblocked4_ matsetvaluesblocked4
913 #endif
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "matsetvaluesblocked4_"
917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918 {
919   Mat               A  = *AA;
920   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922   PetscInt          *ai    =a->i,*ailen=a->ilen;
923   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924   const PetscScalar *value = v;
925   MatScalar         *ap,*aa = a->a,*bap;
926 
927   PetscFunctionBegin;
928   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929   stepval = (n-1)*4;
930   for (k=0; k<m; k++) { /* loop over added rows */
931     row  = im[k];
932     rp   = aj + ai[row];
933     ap   = aa + 16*ai[row];
934     nrow = ailen[row];
935     low  = 0;
936     high = nrow;
937     for (l=0; l<n; l++) { /* loop over added columns */
938       col = in[l];
939       if (col <= lastcol)  low = 0;
940       else                high = nrow;
941       lastcol = col;
942       value   = v + k*(stepval+4 + l)*4;
943       while (high-low > 7) {
944         t = (low+high)/2;
945         if (rp[t] > col) high = t;
946         else             low  = t;
947       }
948       for (i=low; i<high; i++) {
949         if (rp[i] > col) break;
950         if (rp[i] == col) {
951           bap = ap +  16*i;
952           for (ii=0; ii<4; ii++,value+=stepval) {
953             for (jj=ii; jj<16; jj+=4) {
954               bap[jj] += *value++;
955             }
956           }
957           goto noinsert2;
958         }
959       }
960       N = nrow++ - 1;
961       high++; /* added new column index thus must search to one higher than before */
962       /* shift up all the later entries in this row */
963       for (ii=N; ii>=i; ii--) {
964         rp[ii+1] = rp[ii];
965         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966       }
967       if (N >= i) {
968         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969       }
970       rp[i] = col;
971       bap   = ap +  16*i;
972       for (ii=0; ii<4; ii++,value+=stepval) {
973         for (jj=ii; jj<16; jj+=4) {
974           bap[jj] = *value++;
975         }
976       }
977       noinsert2:;
978       low = i;
979     }
980     ailen[row] = nrow;
981   }
982   PetscFunctionReturnVoid();
983 }
984 
985 #if defined(PETSC_HAVE_FORTRAN_CAPS)
986 #define matsetvalues4_ MATSETVALUES4
987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988 #define matsetvalues4_ matsetvalues4
989 #endif
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatSetValues4_"
993 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
994 {
995   Mat         A  = *AA;
996   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
997   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
998   PetscInt    *ai=a->i,*ailen=a->ilen;
999   PetscInt    *aj=a->j,brow,bcol;
1000   PetscInt    ridx,cidx,lastcol = -1;
1001   MatScalar   *ap,value,*aa=a->a,*bap;
1002 
1003   PetscFunctionBegin;
1004   for (k=0; k<m; k++) { /* loop over added rows */
1005     row  = im[k]; brow = row/4;
1006     rp   = aj + ai[brow];
1007     ap   = aa + 16*ai[brow];
1008     nrow = ailen[brow];
1009     low  = 0;
1010     high = nrow;
1011     for (l=0; l<n; l++) { /* loop over added columns */
1012       col   = in[l]; bcol = col/4;
1013       ridx  = row % 4; cidx = col % 4;
1014       value = v[l + k*n];
1015       if (col <= lastcol)  low = 0;
1016       else                high = nrow;
1017       lastcol = col;
1018       while (high-low > 7) {
1019         t = (low+high)/2;
1020         if (rp[t] > bcol) high = t;
1021         else              low  = t;
1022       }
1023       for (i=low; i<high; i++) {
1024         if (rp[i] > bcol) break;
1025         if (rp[i] == bcol) {
1026           bap   = ap +  16*i + 4*cidx + ridx;
1027           *bap += value;
1028           goto noinsert1;
1029         }
1030       }
1031       N = nrow++ - 1;
1032       high++; /* added new column thus must search to one higher than before */
1033       /* shift up all the later entries in this row */
1034       for (ii=N; ii>=i; ii--) {
1035         rp[ii+1] = rp[ii];
1036         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1037       }
1038       if (N>=i) {
1039         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1040       }
1041       rp[i]                    = bcol;
1042       ap[16*i + 4*cidx + ridx] = value;
1043 noinsert1:;
1044       low = i;
1045     }
1046     ailen[brow] = nrow;
1047   }
1048   PetscFunctionReturnVoid();
1049 }
1050 
1051 /*
1052      Checks for missing diagonals
1053 */
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1057 {
1058   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1059   PetscErrorCode ierr;
1060   PetscInt       *diag,*ii = a->i,i;
1061 
1062   PetscFunctionBegin;
1063   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1064   *missing = PETSC_FALSE;
1065   if (A->rmap->n > 0 && !ii) {
1066     *missing = PETSC_TRUE;
1067     if (d) *d = 0;
1068     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069   } else {
1070     diag = a->diag;
1071     for (i=0; i<a->mbs; i++) {
1072       if (diag[i] >= ii[i+1]) {
1073         *missing = PETSC_TRUE;
1074         if (d) *d = i;
1075         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076         break;
1077       }
1078     }
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1085 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1086 {
1087   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1088   PetscErrorCode ierr;
1089   PetscInt       i,j,m = a->mbs;
1090 
1091   PetscFunctionBegin;
1092   if (!a->diag) {
1093     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1094     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1095     a->free_diag = PETSC_TRUE;
1096   }
1097   for (i=0; i<m; i++) {
1098     a->diag[i] = a->i[i+1];
1099     for (j=a->i[i]; j<a->i[i+1]; j++) {
1100       if (a->j[j] == i) {
1101         a->diag[i] = j;
1102         break;
1103       }
1104     }
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1112 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1113 {
1114   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1115   PetscErrorCode ierr;
1116   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1117   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1118 
1119   PetscFunctionBegin;
1120   *nn = n;
1121   if (!ia) PetscFunctionReturn(0);
1122   if (symmetric) {
1123     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1124     nz   = tia[n];
1125   } else {
1126     tia = a->i; tja = a->j;
1127   }
1128 
1129   if (!blockcompressed && bs > 1) {
1130     (*nn) *= bs;
1131     /* malloc & create the natural set of indices */
1132     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1133     if (n) {
1134       (*ia)[0] = 0;
1135       for (j=1; j<bs; j++) {
1136         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1137       }
1138     }
1139 
1140     for (i=1; i<n; i++) {
1141       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1142       for (j=1; j<bs; j++) {
1143         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1144       }
1145     }
1146     if (n) {
1147       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1148     }
1149 
1150     if (inja) {
1151       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1152       cnt = 0;
1153       for (i=0; i<n; i++) {
1154         for (j=0; j<bs; j++) {
1155           for (k=tia[i]; k<tia[i+1]; k++) {
1156             for (l=0; l<bs; l++) {
1157               (*ja)[cnt++] = bs*tja[k] + l;
1158             }
1159           }
1160         }
1161       }
1162     }
1163 
1164     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1165       ierr = PetscFree(tia);CHKERRQ(ierr);
1166       ierr = PetscFree(tja);CHKERRQ(ierr);
1167     }
1168   } else if (oshift == 1) {
1169     if (symmetric) {
1170       nz = tia[A->rmap->n/bs];
1171       /*  add 1 to i and j indices */
1172       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1173       *ia = tia;
1174       if (ja) {
1175         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1176         *ja = tja;
1177       }
1178     } else {
1179       nz = a->i[A->rmap->n/bs];
1180       /* malloc space and  add 1 to i and j indices */
1181       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1182       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1183       if (ja) {
1184         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1185         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1186       }
1187     }
1188   } else {
1189     *ia = tia;
1190     if (ja) *ja = tja;
1191   }
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1197 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1198 {
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   if (!ia) PetscFunctionReturn(0);
1203   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1204     ierr = PetscFree(*ia);CHKERRQ(ierr);
1205     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1212 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1213 {
1214   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218 #if defined(PETSC_USE_LOG)
1219   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1220 #endif
1221   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1222   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1223   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1224   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1225   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1226   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1227   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1228   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1229   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1230   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1231   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1232   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1233   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1234 
1235   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1236   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1237   ierr = PetscFree(A->data);CHKERRQ(ierr);
1238 
1239   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1255 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1256 {
1257   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   switch (op) {
1262   case MAT_ROW_ORIENTED:
1263     a->roworiented = flg;
1264     break;
1265   case MAT_KEEP_NONZERO_PATTERN:
1266     a->keepnonzeropattern = flg;
1267     break;
1268   case MAT_NEW_NONZERO_LOCATIONS:
1269     a->nonew = (flg ? 0 : 1);
1270     break;
1271   case MAT_NEW_NONZERO_LOCATION_ERR:
1272     a->nonew = (flg ? -1 : 0);
1273     break;
1274   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1275     a->nonew = (flg ? -2 : 0);
1276     break;
1277   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1278     a->nounused = (flg ? -1 : 0);
1279     break;
1280   case MAT_NEW_DIAGONALS:
1281   case MAT_IGNORE_OFF_PROC_ENTRIES:
1282   case MAT_USE_HASH_TABLE:
1283     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1284     break;
1285   case MAT_SPD:
1286   case MAT_SYMMETRIC:
1287   case MAT_STRUCTURALLY_SYMMETRIC:
1288   case MAT_HERMITIAN:
1289   case MAT_SYMMETRY_ETERNAL:
1290     /* These options are handled directly by MatSetOption() */
1291     break;
1292   default:
1293     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1301 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1302 {
1303   PetscErrorCode ierr;
1304   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1305   MatScalar      *aa_i;
1306   PetscScalar    *v_i;
1307 
1308   PetscFunctionBegin;
1309   bs  = A->rmap->bs;
1310   bs2 = bs*bs;
1311   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1312 
1313   bn  = row/bs;   /* Block number */
1314   bp  = row % bs; /* Block Position */
1315   M   = ai[bn+1] - ai[bn];
1316   *nz = bs*M;
1317 
1318   if (v) {
1319     *v = 0;
1320     if (*nz) {
1321       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1322       for (i=0; i<M; i++) { /* for each block in the block row */
1323         v_i  = *v + i*bs;
1324         aa_i = aa + bs2*(ai[bn] + i);
1325         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1326       }
1327     }
1328   }
1329 
1330   if (idx) {
1331     *idx = 0;
1332     if (*nz) {
1333       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1334       for (i=0; i<M; i++) { /* for each block in the block row */
1335         idx_i = *idx + i*bs;
1336         itmp  = bs*aj[ai[bn] + i];
1337         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1338       }
1339     }
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1346 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1347 {
1348   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1358 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1359 {
1360   PetscErrorCode ierr;
1361 
1362   PetscFunctionBegin;
1363   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1364   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1372 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1373 {
1374   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1375   Mat            C;
1376   PetscErrorCode ierr;
1377   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1378   PetscInt       *rows,*cols,bs2=a->bs2;
1379   MatScalar      *array;
1380 
1381   PetscFunctionBegin;
1382   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1383   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1384     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1385 
1386     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1387     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1388     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1389     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1390     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1391     ierr = PetscFree(col);CHKERRQ(ierr);
1392   } else {
1393     C = *B;
1394   }
1395 
1396   array = a->a;
1397   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1398   for (i=0; i<mbs; i++) {
1399     cols[0] = i*bs;
1400     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1401     len = ai[i+1] - ai[i];
1402     for (j=0; j<len; j++) {
1403       rows[0] = (*aj++)*bs;
1404       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1405       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1406       array += bs2;
1407     }
1408   }
1409   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1410 
1411   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413 
1414   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1415     *B = C;
1416   } else {
1417     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1424 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1425 {
1426   PetscErrorCode ierr;
1427   Mat            Btrans;
1428 
1429   PetscFunctionBegin;
1430   *f   = PETSC_FALSE;
1431   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1432   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1433   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1439 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1440 {
1441   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1442   PetscErrorCode ierr;
1443   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1444   int            fd;
1445   PetscScalar    *aa;
1446   FILE           *file;
1447 
1448   PetscFunctionBegin;
1449   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1450   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1451   col_lens[0] = MAT_FILE_CLASSID;
1452 
1453   col_lens[1] = A->rmap->N;
1454   col_lens[2] = A->cmap->n;
1455   col_lens[3] = a->nz*bs2;
1456 
1457   /* store lengths of each row and write (including header) to file */
1458   count = 0;
1459   for (i=0; i<a->mbs; i++) {
1460     for (j=0; j<bs; j++) {
1461       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1462     }
1463   }
1464   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1465   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1466 
1467   /* store column indices (zero start index) */
1468   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1469   count = 0;
1470   for (i=0; i<a->mbs; i++) {
1471     for (j=0; j<bs; j++) {
1472       for (k=a->i[i]; k<a->i[i+1]; k++) {
1473         for (l=0; l<bs; l++) {
1474           jj[count++] = bs*a->j[k] + l;
1475         }
1476       }
1477     }
1478   }
1479   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1480   ierr = PetscFree(jj);CHKERRQ(ierr);
1481 
1482   /* store nonzero values */
1483   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1484   count = 0;
1485   for (i=0; i<a->mbs; i++) {
1486     for (j=0; j<bs; j++) {
1487       for (k=a->i[i]; k<a->i[i+1]; k++) {
1488         for (l=0; l<bs; l++) {
1489           aa[count++] = a->a[bs2*k + l*bs + j];
1490         }
1491       }
1492     }
1493   }
1494   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1495   ierr = PetscFree(aa);CHKERRQ(ierr);
1496 
1497   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1498   if (file) {
1499     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1506 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1507 {
1508   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1509   PetscErrorCode    ierr;
1510   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1511   PetscViewerFormat format;
1512 
1513   PetscFunctionBegin;
1514   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1515   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1516     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1517   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1518     const char *matname;
1519     Mat        aij;
1520     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1521     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1522     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1523     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1524     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1525   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1526       PetscFunctionReturn(0);
1527   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1528     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1529     for (i=0; i<a->mbs; i++) {
1530       for (j=0; j<bs; j++) {
1531         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1532         for (k=a->i[i]; k<a->i[i+1]; k++) {
1533           for (l=0; l<bs; l++) {
1534 #if defined(PETSC_USE_COMPLEX)
1535             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1537                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             }
1544 #else
1545             if (a->a[bs2*k + l*bs + j] != 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1547             }
1548 #endif
1549           }
1550         }
1551         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1552       }
1553     }
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   } else {
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1557     for (i=0; i<a->mbs; i++) {
1558       for (j=0; j<bs; j++) {
1559         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1560         for (k=a->i[i]; k<a->i[i+1]; k++) {
1561           for (l=0; l<bs; l++) {
1562 #if defined(PETSC_USE_COMPLEX)
1563             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1565                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1568                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             }
1572 #else
1573             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1574 #endif
1575           }
1576         }
1577         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1578       }
1579     }
1580     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1581   }
1582   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #include <petscdraw.h>
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590 {
1591   Mat               A = (Mat) Aa;
1592   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1593   PetscErrorCode    ierr;
1594   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596   MatScalar         *aa;
1597   PetscViewer       viewer;
1598   PetscViewerFormat format;
1599 
1600   PetscFunctionBegin;
1601   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1602   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1603   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1604 
1605   /* loop over matrix elements drawing boxes */
1606 
1607   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1608     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1609     /* Blue for negative, Cyan for zero and  Red for positive */
1610     color = PETSC_DRAW_BLUE;
1611     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615         aa  = a->a + j*bs2;
1616         for (k=0; k<bs; k++) {
1617           for (l=0; l<bs; l++) {
1618             if (PetscRealPart(*aa++) >=  0.) continue;
1619             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1620           }
1621         }
1622       }
1623     }
1624     color = PETSC_DRAW_CYAN;
1625     for (i=0,row=0; i<mbs; i++,row+=bs) {
1626       for (j=a->i[i]; j<a->i[i+1]; j++) {
1627         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629         aa  = a->a + j*bs2;
1630         for (k=0; k<bs; k++) {
1631           for (l=0; l<bs; l++) {
1632             if (PetscRealPart(*aa++) != 0.) continue;
1633             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1634           }
1635         }
1636       }
1637     }
1638     color = PETSC_DRAW_RED;
1639     for (i=0,row=0; i<mbs; i++,row+=bs) {
1640       for (j=a->i[i]; j<a->i[i+1]; j++) {
1641         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643         aa  = a->a + j*bs2;
1644         for (k=0; k<bs; k++) {
1645           for (l=0; l<bs; l++) {
1646             if (PetscRealPart(*aa++) <= 0.) continue;
1647             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1648           }
1649         }
1650       }
1651     }
1652     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1653   } else {
1654     /* use contour shading to indicate magnitude of values */
1655     /* first determine max of all nonzero values */
1656     PetscReal minv = 0.0, maxv = 0.0;
1657     PetscDraw popup;
1658 
1659     for (i=0; i<a->nz*a->bs2; i++) {
1660       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1661     }
1662     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1663     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1664     if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1665 
1666     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1667     for (i=0,row=0; i<mbs; i++,row+=bs) {
1668       for (j=a->i[i]; j<a->i[i+1]; j++) {
1669         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671         aa  = a->a + j*bs2;
1672         for (k=0; k<bs; k++) {
1673           for (l=0; l<bs; l++) {
1674             MatScalar v = *aa++;
1675             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1676             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1677           }
1678         }
1679       }
1680     }
1681     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1682   }
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1688 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1689 {
1690   PetscErrorCode ierr;
1691   PetscReal      xl,yl,xr,yr,w,h;
1692   PetscDraw      draw;
1693   PetscBool      isnull;
1694 
1695   PetscFunctionBegin;
1696   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1697   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1698 
1699   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1700   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1701   xr  += w;    yr += h;  xl = -w;     yl = -h;
1702   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1703   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatView_SeqBAIJ"
1710 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1711 {
1712   PetscErrorCode ierr;
1713   PetscBool      iascii,isbinary,isdraw;
1714 
1715   PetscFunctionBegin;
1716   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1717   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1718   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1719   if (iascii) {
1720     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1721   } else if (isbinary) {
1722     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1723   } else if (isdraw) {
1724     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1725   } else {
1726     Mat B;
1727     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1728     ierr = MatView(B,viewer);CHKERRQ(ierr);
1729     ierr = MatDestroy(&B);CHKERRQ(ierr);
1730   }
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1737 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1738 {
1739   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1740   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1741   PetscInt    *ai = a->i,*ailen = a->ilen;
1742   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1743   MatScalar   *ap,*aa = a->a;
1744 
1745   PetscFunctionBegin;
1746   for (k=0; k<m; k++) { /* loop over rows */
1747     row = im[k]; brow = row/bs;
1748     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1749     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1750     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1751     nrow = ailen[brow];
1752     for (l=0; l<n; l++) { /* loop over columns */
1753       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1754       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1755       col  = in[l];
1756       bcol = col/bs;
1757       cidx = col%bs;
1758       ridx = row%bs;
1759       high = nrow;
1760       low  = 0; /* assume unsorted */
1761       while (high-low > 5) {
1762         t = (low+high)/2;
1763         if (rp[t] > bcol) high = t;
1764         else             low  = t;
1765       }
1766       for (i=low; i<high; i++) {
1767         if (rp[i] > bcol) break;
1768         if (rp[i] == bcol) {
1769           *v++ = ap[bs2*i+bs*cidx+ridx];
1770           goto finished;
1771         }
1772       }
1773       *v++ = 0.0;
1774 finished:;
1775     }
1776   }
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1782 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1783 {
1784   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1785   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1786   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1787   PetscErrorCode    ierr;
1788   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1789   PetscBool         roworiented=a->roworiented;
1790   const PetscScalar *value     = v;
1791   MatScalar         *ap,*aa = a->a,*bap;
1792 
1793   PetscFunctionBegin;
1794   if (roworiented) {
1795     stepval = (n-1)*bs;
1796   } else {
1797     stepval = (m-1)*bs;
1798   }
1799   for (k=0; k<m; k++) { /* loop over added rows */
1800     row = im[k];
1801     if (row < 0) continue;
1802 #if defined(PETSC_USE_DEBUG)
1803     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1804 #endif
1805     rp   = aj + ai[row];
1806     ap   = aa + bs2*ai[row];
1807     rmax = imax[row];
1808     nrow = ailen[row];
1809     low  = 0;
1810     high = nrow;
1811     for (l=0; l<n; l++) { /* loop over added columns */
1812       if (in[l] < 0) continue;
1813 #if defined(PETSC_USE_DEBUG)
1814       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1815 #endif
1816       col = in[l];
1817       if (roworiented) {
1818         value = v + (k*(stepval+bs) + l)*bs;
1819       } else {
1820         value = v + (l*(stepval+bs) + k)*bs;
1821       }
1822       if (col <= lastcol) low = 0;
1823       else high = nrow;
1824       lastcol = col;
1825       while (high-low > 7) {
1826         t = (low+high)/2;
1827         if (rp[t] > col) high = t;
1828         else             low  = t;
1829       }
1830       for (i=low; i<high; i++) {
1831         if (rp[i] > col) break;
1832         if (rp[i] == col) {
1833           bap = ap +  bs2*i;
1834           if (roworiented) {
1835             if (is == ADD_VALUES) {
1836               for (ii=0; ii<bs; ii++,value+=stepval) {
1837                 for (jj=ii; jj<bs2; jj+=bs) {
1838                   bap[jj] += *value++;
1839                 }
1840               }
1841             } else {
1842               for (ii=0; ii<bs; ii++,value+=stepval) {
1843                 for (jj=ii; jj<bs2; jj+=bs) {
1844                   bap[jj] = *value++;
1845                 }
1846               }
1847             }
1848           } else {
1849             if (is == ADD_VALUES) {
1850               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1851                 for (jj=0; jj<bs; jj++) {
1852                   bap[jj] += value[jj];
1853                 }
1854                 bap += bs;
1855               }
1856             } else {
1857               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1858                 for (jj=0; jj<bs; jj++) {
1859                   bap[jj]  = value[jj];
1860                 }
1861                 bap += bs;
1862               }
1863             }
1864           }
1865           goto noinsert2;
1866         }
1867       }
1868       if (nonew == 1) goto noinsert2;
1869       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1870       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1871       N = nrow++ - 1; high++;
1872       /* shift up all the later entries in this row */
1873       for (ii=N; ii>=i; ii--) {
1874         rp[ii+1] = rp[ii];
1875         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1876       }
1877       if (N >= i) {
1878         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1879       }
1880       rp[i] = col;
1881       bap   = ap +  bs2*i;
1882       if (roworiented) {
1883         for (ii=0; ii<bs; ii++,value+=stepval) {
1884           for (jj=ii; jj<bs2; jj+=bs) {
1885             bap[jj] = *value++;
1886           }
1887         }
1888       } else {
1889         for (ii=0; ii<bs; ii++,value+=stepval) {
1890           for (jj=0; jj<bs; jj++) {
1891             *bap++ = *value++;
1892           }
1893         }
1894       }
1895 noinsert2:;
1896       low = i;
1897     }
1898     ailen[row] = nrow;
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1905 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1906 {
1907   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1908   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1909   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1910   PetscErrorCode ierr;
1911   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1912   MatScalar      *aa  = a->a,*ap;
1913   PetscReal      ratio=0.6;
1914 
1915   PetscFunctionBegin;
1916   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1917 
1918   if (m) rmax = ailen[0];
1919   for (i=1; i<mbs; i++) {
1920     /* move each row back by the amount of empty slots (fshift) before it*/
1921     fshift += imax[i-1] - ailen[i-1];
1922     rmax    = PetscMax(rmax,ailen[i]);
1923     if (fshift) {
1924       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1925       N  = ailen[i];
1926       for (j=0; j<N; j++) {
1927         ip[j-fshift] = ip[j];
1928 
1929         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1930       }
1931     }
1932     ai[i] = ai[i-1] + ailen[i-1];
1933   }
1934   if (mbs) {
1935     fshift += imax[mbs-1] - ailen[mbs-1];
1936     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1937   }
1938 
1939   /* reset ilen and imax for each row */
1940   a->nonzerorowcnt = 0;
1941   for (i=0; i<mbs; i++) {
1942     ailen[i] = imax[i] = ai[i+1] - ai[i];
1943     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1944   }
1945   a->nz = ai[mbs];
1946 
1947   /* diagonals may have moved, so kill the diagonal pointers */
1948   a->idiagvalid = PETSC_FALSE;
1949   if (fshift && a->diag) {
1950     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1951     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1952     a->diag = 0;
1953   }
1954   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1955   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1956   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1957   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1958 
1959   A->info.mallocs    += a->reallocs;
1960   a->reallocs         = 0;
1961   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1962   a->rmax             = rmax;
1963 
1964   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 /*
1969    This function returns an array of flags which indicate the locations of contiguous
1970    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1971    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1972    Assume: sizes should be long enough to hold all the values.
1973 */
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1976 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1977 {
1978   PetscInt  i,j,k,row;
1979   PetscBool flg;
1980 
1981   PetscFunctionBegin;
1982   for (i=0,j=0; i<n; j++) {
1983     row = idx[i];
1984     if (row%bs!=0) { /* Not the begining of a block */
1985       sizes[j] = 1;
1986       i++;
1987     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1988       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1989       i++;
1990     } else { /* Begining of the block, so check if the complete block exists */
1991       flg = PETSC_TRUE;
1992       for (k=1; k<bs; k++) {
1993         if (row+k != idx[i+k]) { /* break in the block */
1994           flg = PETSC_FALSE;
1995           break;
1996         }
1997       }
1998       if (flg) { /* No break in the bs */
1999         sizes[j] = bs;
2000         i       += bs;
2001       } else {
2002         sizes[j] = 1;
2003         i++;
2004       }
2005     }
2006   }
2007   *bs_max = j;
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2013 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2014 {
2015   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2016   PetscErrorCode    ierr;
2017   PetscInt          i,j,k,count,*rows;
2018   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2019   PetscScalar       zero = 0.0;
2020   MatScalar         *aa;
2021   const PetscScalar *xx;
2022   PetscScalar       *bb;
2023 
2024   PetscFunctionBegin;
2025   /* fix right hand side if needed */
2026   if (x && b) {
2027     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2028     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2029     for (i=0; i<is_n; i++) {
2030       bb[is_idx[i]] = diag*xx[is_idx[i]];
2031     }
2032     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2033     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2034   }
2035 
2036   /* Make a copy of the IS and  sort it */
2037   /* allocate memory for rows,sizes */
2038   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2039 
2040   /* copy IS values to rows, and sort them */
2041   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2042   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2043 
2044   if (baij->keepnonzeropattern) {
2045     for (i=0; i<is_n; i++) sizes[i] = 1;
2046     bs_max          = is_n;
2047   } else {
2048     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2049     A->nonzerostate++;
2050   }
2051 
2052   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2053     row = rows[j];
2054     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2055     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2056     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2057     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2058       if (diag != (PetscScalar)0.0) {
2059         if (baij->ilen[row/bs] > 0) {
2060           baij->ilen[row/bs]       = 1;
2061           baij->j[baij->i[row/bs]] = row/bs;
2062 
2063           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2064         }
2065         /* Now insert all the diagonal values for this bs */
2066         for (k=0; k<bs; k++) {
2067           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2068         }
2069       } else { /* (diag == 0.0) */
2070         baij->ilen[row/bs] = 0;
2071       } /* end (diag == 0.0) */
2072     } else { /* (sizes[i] != bs) */
2073 #if defined(PETSC_USE_DEBUG)
2074       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2075 #endif
2076       for (k=0; k<count; k++) {
2077         aa[0] =  zero;
2078         aa   += bs;
2079       }
2080       if (diag != (PetscScalar)0.0) {
2081         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2082       }
2083     }
2084   }
2085 
2086   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2087   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2093 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2094 {
2095   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2096   PetscErrorCode    ierr;
2097   PetscInt          i,j,k,count;
2098   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2099   PetscScalar       zero = 0.0;
2100   MatScalar         *aa;
2101   const PetscScalar *xx;
2102   PetscScalar       *bb;
2103   PetscBool         *zeroed,vecs = PETSC_FALSE;
2104 
2105   PetscFunctionBegin;
2106   /* fix right hand side if needed */
2107   if (x && b) {
2108     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2109     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2110     vecs = PETSC_TRUE;
2111   }
2112 
2113   /* zero the columns */
2114   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2115   for (i=0; i<is_n; i++) {
2116     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2117     zeroed[is_idx[i]] = PETSC_TRUE;
2118   }
2119   for (i=0; i<A->rmap->N; i++) {
2120     if (!zeroed[i]) {
2121       row = i/bs;
2122       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2123         for (k=0; k<bs; k++) {
2124           col = bs*baij->j[j] + k;
2125           if (zeroed[col]) {
2126             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2127             if (vecs) bb[i] -= aa[0]*xx[col];
2128             aa[0] = 0.0;
2129           }
2130         }
2131       }
2132     } else if (vecs) bb[i] = diag*xx[i];
2133   }
2134   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2135   if (vecs) {
2136     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2137     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2138   }
2139 
2140   /* zero the rows */
2141   for (i=0; i<is_n; i++) {
2142     row   = is_idx[i];
2143     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2144     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2145     for (k=0; k<count; k++) {
2146       aa[0] =  zero;
2147       aa   += bs;
2148     }
2149     if (diag != (PetscScalar)0.0) {
2150       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2151     }
2152   }
2153   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2159 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2160 {
2161   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2162   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2163   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2164   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2165   PetscErrorCode ierr;
2166   PetscInt       ridx,cidx,bs2=a->bs2;
2167   PetscBool      roworiented=a->roworiented;
2168   MatScalar      *ap,value,*aa=a->a,*bap;
2169 
2170   PetscFunctionBegin;
2171   for (k=0; k<m; k++) { /* loop over added rows */
2172     row  = im[k];
2173     brow = row/bs;
2174     if (row < 0) continue;
2175 #if defined(PETSC_USE_DEBUG)
2176     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2177 #endif
2178     rp   = aj + ai[brow];
2179     ap   = aa + bs2*ai[brow];
2180     rmax = imax[brow];
2181     nrow = ailen[brow];
2182     low  = 0;
2183     high = nrow;
2184     for (l=0; l<n; l++) { /* loop over added columns */
2185       if (in[l] < 0) continue;
2186 #if defined(PETSC_USE_DEBUG)
2187       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2188 #endif
2189       col  = in[l]; bcol = col/bs;
2190       ridx = row % bs; cidx = col % bs;
2191       if (roworiented) {
2192         value = v[l + k*n];
2193       } else {
2194         value = v[k + l*m];
2195       }
2196       if (col <= lastcol) low = 0; else high = nrow;
2197       lastcol = col;
2198       while (high-low > 7) {
2199         t = (low+high)/2;
2200         if (rp[t] > bcol) high = t;
2201         else              low  = t;
2202       }
2203       for (i=low; i<high; i++) {
2204         if (rp[i] > bcol) break;
2205         if (rp[i] == bcol) {
2206           bap = ap +  bs2*i + bs*cidx + ridx;
2207           if (is == ADD_VALUES) *bap += value;
2208           else                  *bap  = value;
2209           goto noinsert1;
2210         }
2211       }
2212       if (nonew == 1) goto noinsert1;
2213       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2214       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2215       N = nrow++ - 1; high++;
2216       /* shift up all the later entries in this row */
2217       for (ii=N; ii>=i; ii--) {
2218         rp[ii+1] = rp[ii];
2219         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2220       }
2221       if (N>=i) {
2222         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2223       }
2224       rp[i]                      = bcol;
2225       ap[bs2*i + bs*cidx + ridx] = value;
2226       a->nz++;
2227       A->nonzerostate++;
2228 noinsert1:;
2229       low = i;
2230     }
2231     ailen[brow] = nrow;
2232   }
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2238 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2239 {
2240   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2241   Mat            outA;
2242   PetscErrorCode ierr;
2243   PetscBool      row_identity,col_identity;
2244 
2245   PetscFunctionBegin;
2246   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2247   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2248   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2249   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2250 
2251   outA            = inA;
2252   inA->factortype = MAT_FACTOR_LU;
2253 
2254   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2255 
2256   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2257   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2258   a->row = row;
2259   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2260   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2261   a->col = col;
2262 
2263   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2264   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2265   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2266   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2267 
2268   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2269   if (!a->solve_work) {
2270     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2271     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2272   }
2273   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 #undef __FUNCT__
2278 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2279 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2280 {
2281   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2282   PetscInt    i,nz,mbs;
2283 
2284   PetscFunctionBegin;
2285   nz  = baij->maxnz;
2286   mbs = baij->mbs;
2287   for (i=0; i<nz; i++) {
2288     baij->j[i] = indices[i];
2289   }
2290   baij->nz = nz;
2291   for (i=0; i<mbs; i++) {
2292     baij->ilen[i] = baij->imax[i];
2293   }
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2299 /*@
2300     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2301        in the matrix.
2302 
2303   Input Parameters:
2304 +  mat - the SeqBAIJ matrix
2305 -  indices - the column indices
2306 
2307   Level: advanced
2308 
2309   Notes:
2310     This can be called if you have precomputed the nonzero structure of the
2311   matrix and want to provide it to the matrix object to improve the performance
2312   of the MatSetValues() operation.
2313 
2314     You MUST have set the correct numbers of nonzeros per row in the call to
2315   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2316 
2317     MUST be called before any calls to MatSetValues();
2318 
2319 @*/
2320 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2321 {
2322   PetscErrorCode ierr;
2323 
2324   PetscFunctionBegin;
2325   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2326   PetscValidPointer(indices,2);
2327   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 #undef __FUNCT__
2332 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2333 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2334 {
2335   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2336   PetscErrorCode ierr;
2337   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2338   PetscReal      atmp;
2339   PetscScalar    *x,zero = 0.0;
2340   MatScalar      *aa;
2341   PetscInt       ncols,brow,krow,kcol;
2342 
2343   PetscFunctionBegin;
2344   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2345   bs  = A->rmap->bs;
2346   aa  = a->a;
2347   ai  = a->i;
2348   aj  = a->j;
2349   mbs = a->mbs;
2350 
2351   ierr = VecSet(v,zero);CHKERRQ(ierr);
2352   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2353   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2354   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2355   for (i=0; i<mbs; i++) {
2356     ncols = ai[1] - ai[0]; ai++;
2357     brow  = bs*i;
2358     for (j=0; j<ncols; j++) {
2359       for (kcol=0; kcol<bs; kcol++) {
2360         for (krow=0; krow<bs; krow++) {
2361           atmp = PetscAbsScalar(*aa);aa++;
2362           row  = brow + krow;   /* row index */
2363           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2364         }
2365       }
2366       aj++;
2367     }
2368   }
2369   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatCopy_SeqBAIJ"
2375 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2376 {
2377   PetscErrorCode ierr;
2378 
2379   PetscFunctionBegin;
2380   /* If the two matrices have the same copy implementation, use fast copy. */
2381   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2382     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2383     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2384     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2385 
2386     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2387     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2388     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2389   } else {
2390     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2391   }
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2397 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2398 {
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2408 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2409 {
2410   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2411 
2412   PetscFunctionBegin;
2413   *array = a->a;
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2419 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2420 {
2421   PetscFunctionBegin;
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2427 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2428 {
2429   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2430   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2431   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2432   PetscErrorCode ierr;
2433 
2434   PetscFunctionBegin;
2435   /* Set the number of nonzeros in the new matrix */
2436   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2442 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2443 {
2444   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2445   PetscErrorCode ierr;
2446   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2447   PetscBLASInt   one=1;
2448 
2449   PetscFunctionBegin;
2450   if (str == SAME_NONZERO_PATTERN) {
2451     PetscScalar  alpha = a;
2452     PetscBLASInt bnz;
2453     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2454     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2455     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2456   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2457     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2458   } else {
2459     Mat      B;
2460     PetscInt *nnz;
2461     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2462     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2463     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2464     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2465     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2466     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2467     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2468     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2469     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2470     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2471     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2472     ierr = PetscFree(nnz);CHKERRQ(ierr);
2473   }
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2479 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2480 {
2481   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2482   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2483   MatScalar   *aa = a->a;
2484 
2485   PetscFunctionBegin;
2486   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2492 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2493 {
2494   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2495   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2496   MatScalar   *aa = a->a;
2497 
2498   PetscFunctionBegin;
2499   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2505 /*
2506     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2507 */
2508 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2509 {
2510   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2511   PetscErrorCode ierr;
2512   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2513   PetscInt       nz = a->i[m],row,*jj,mr,col;
2514 
2515   PetscFunctionBegin;
2516   *nn = n;
2517   if (!ia) PetscFunctionReturn(0);
2518   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2519   else {
2520     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2521     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2522     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2523     jj   = a->j;
2524     for (i=0; i<nz; i++) {
2525       collengths[jj[i]]++;
2526     }
2527     cia[0] = oshift;
2528     for (i=0; i<n; i++) {
2529       cia[i+1] = cia[i] + collengths[i];
2530     }
2531     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2532     jj   = a->j;
2533     for (row=0; row<m; row++) {
2534       mr = a->i[row+1] - a->i[row];
2535       for (i=0; i<mr; i++) {
2536         col = *jj++;
2537 
2538         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2539       }
2540     }
2541     ierr = PetscFree(collengths);CHKERRQ(ierr);
2542     *ia  = cia; *ja = cja;
2543   }
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2549 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2550 {
2551   PetscErrorCode ierr;
2552 
2553   PetscFunctionBegin;
2554   if (!ia) PetscFunctionReturn(0);
2555   ierr = PetscFree(*ia);CHKERRQ(ierr);
2556   ierr = PetscFree(*ja);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 /*
2561  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2562  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2563  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2564  */
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2567 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2568 {
2569   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2570   PetscErrorCode ierr;
2571   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2572   PetscInt       nz = a->i[m],row,*jj,mr,col;
2573   PetscInt       *cspidx;
2574 
2575   PetscFunctionBegin;
2576   *nn = n;
2577   if (!ia) PetscFunctionReturn(0);
2578 
2579   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2580   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2581   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2582   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2583   jj   = a->j;
2584   for (i=0; i<nz; i++) {
2585     collengths[jj[i]]++;
2586   }
2587   cia[0] = oshift;
2588   for (i=0; i<n; i++) {
2589     cia[i+1] = cia[i] + collengths[i];
2590   }
2591   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2592   jj   = a->j;
2593   for (row=0; row<m; row++) {
2594     mr = a->i[row+1] - a->i[row];
2595     for (i=0; i<mr; i++) {
2596       col = *jj++;
2597       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2598       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2599     }
2600   }
2601   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2602   *ia    = cia; *ja = cja;
2603   *spidx = cspidx;
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2609 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2610 {
2611   PetscErrorCode ierr;
2612 
2613   PetscFunctionBegin;
2614   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2615   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 #undef __FUNCT__
2620 #define __FUNCT__ "MatShift_SeqBAIJ"
2621 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2622 {
2623   PetscErrorCode ierr;
2624   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2625 
2626   PetscFunctionBegin;
2627   if (!Y->preallocated || !aij->nz) {
2628     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2629   }
2630   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 /* -------------------------------------------------------------------*/
2635 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2636                                        MatGetRow_SeqBAIJ,
2637                                        MatRestoreRow_SeqBAIJ,
2638                                        MatMult_SeqBAIJ_N,
2639                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2640                                        MatMultTranspose_SeqBAIJ,
2641                                        MatMultTransposeAdd_SeqBAIJ,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                /* 10*/ 0,
2646                                        MatLUFactor_SeqBAIJ,
2647                                        0,
2648                                        0,
2649                                        MatTranspose_SeqBAIJ,
2650                                /* 15*/ MatGetInfo_SeqBAIJ,
2651                                        MatEqual_SeqBAIJ,
2652                                        MatGetDiagonal_SeqBAIJ,
2653                                        MatDiagonalScale_SeqBAIJ,
2654                                        MatNorm_SeqBAIJ,
2655                                /* 20*/ 0,
2656                                        MatAssemblyEnd_SeqBAIJ,
2657                                        MatSetOption_SeqBAIJ,
2658                                        MatZeroEntries_SeqBAIJ,
2659                                /* 24*/ MatZeroRows_SeqBAIJ,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                /* 29*/ MatSetUp_SeqBAIJ,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                /* 34*/ MatDuplicate_SeqBAIJ,
2670                                        0,
2671                                        0,
2672                                        MatILUFactor_SeqBAIJ,
2673                                        0,
2674                                /* 39*/ MatAXPY_SeqBAIJ,
2675                                        MatGetSubMatrices_SeqBAIJ,
2676                                        MatIncreaseOverlap_SeqBAIJ,
2677                                        MatGetValues_SeqBAIJ,
2678                                        MatCopy_SeqBAIJ,
2679                                /* 44*/ 0,
2680                                        MatScale_SeqBAIJ,
2681                                        MatShift_SeqBAIJ,
2682                                        0,
2683                                        MatZeroRowsColumns_SeqBAIJ,
2684                                /* 49*/ 0,
2685                                        MatGetRowIJ_SeqBAIJ,
2686                                        MatRestoreRowIJ_SeqBAIJ,
2687                                        MatGetColumnIJ_SeqBAIJ,
2688                                        MatRestoreColumnIJ_SeqBAIJ,
2689                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        MatSetValuesBlocked_SeqBAIJ,
2694                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2695                                        MatDestroy_SeqBAIJ,
2696                                        MatView_SeqBAIJ,
2697                                        0,
2698                                        0,
2699                                /* 64*/ 0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2705                                        0,
2706                                        MatConvert_Basic,
2707                                        0,
2708                                        0,
2709                                /* 74*/ 0,
2710                                        MatFDColoringApply_BAIJ,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                /* 79*/ 0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                        MatLoad_SeqBAIJ,
2719                                /* 84*/ 0,
2720                                        0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                /* 89*/ 0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                /* 94*/ 0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                /* 99*/ 0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                /*104*/ 0,
2740                                        MatRealPart_SeqBAIJ,
2741                                        MatImaginaryPart_SeqBAIJ,
2742                                        0,
2743                                        0,
2744                                /*109*/ 0,
2745                                        0,
2746                                        0,
2747                                        0,
2748                                        MatMissingDiagonal_SeqBAIJ,
2749                                /*114*/ 0,
2750                                        0,
2751                                        0,
2752                                        0,
2753                                        0,
2754                                /*119*/ 0,
2755                                        0,
2756                                        MatMultHermitianTranspose_SeqBAIJ,
2757                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2758                                        0,
2759                                /*124*/ 0,
2760                                        0,
2761                                        MatInvertBlockDiagonal_SeqBAIJ,
2762                                        0,
2763                                        0,
2764                                /*129*/ 0,
2765                                        0,
2766                                        0,
2767                                        0,
2768                                        0,
2769                                /*134*/ 0,
2770                                        0,
2771                                        0,
2772                                        0,
2773                                        0,
2774                                /*139*/ 0,
2775                                        0,
2776                                        0,
2777                                        MatFDColoringSetUp_SeqXAIJ,
2778                                        0,
2779                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2780 };
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2784 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2785 {
2786   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2787   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2788   PetscErrorCode ierr;
2789 
2790   PetscFunctionBegin;
2791   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2792 
2793   /* allocate space for values if not already there */
2794   if (!aij->saved_values) {
2795     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2796     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2797   }
2798 
2799   /* copy values over */
2800   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2806 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2807 {
2808   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2809   PetscErrorCode ierr;
2810   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2811 
2812   PetscFunctionBegin;
2813   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2814   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2815 
2816   /* copy values over */
2817   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2822 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2823 
2824 #undef __FUNCT__
2825 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2826 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2827 {
2828   Mat_SeqBAIJ    *b;
2829   PetscErrorCode ierr;
2830   PetscInt       i,mbs,nbs,bs2;
2831   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2832 
2833   PetscFunctionBegin;
2834   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2835   if (nz == MAT_SKIP_ALLOCATION) {
2836     skipallocation = PETSC_TRUE;
2837     nz             = 0;
2838   }
2839 
2840   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2841   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2842   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2843   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2844 
2845   B->preallocated = PETSC_TRUE;
2846 
2847   mbs = B->rmap->n/bs;
2848   nbs = B->cmap->n/bs;
2849   bs2 = bs*bs;
2850 
2851   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2852 
2853   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2854   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2855   if (nnz) {
2856     for (i=0; i<mbs; i++) {
2857       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2858       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2859     }
2860   }
2861 
2862   b    = (Mat_SeqBAIJ*)B->data;
2863   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2864   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2865   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2866 
2867   if (!flg) {
2868     switch (bs) {
2869     case 1:
2870       B->ops->mult    = MatMult_SeqBAIJ_1;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2872       break;
2873     case 2:
2874       B->ops->mult    = MatMult_SeqBAIJ_2;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2876       break;
2877     case 3:
2878       B->ops->mult    = MatMult_SeqBAIJ_3;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2880       break;
2881     case 4:
2882       B->ops->mult    = MatMult_SeqBAIJ_4;
2883       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2884       break;
2885     case 5:
2886       B->ops->mult    = MatMult_SeqBAIJ_5;
2887       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2888       break;
2889     case 6:
2890       B->ops->mult    = MatMult_SeqBAIJ_6;
2891       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2892       break;
2893     case 7:
2894       B->ops->mult    = MatMult_SeqBAIJ_7;
2895       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2896       break;
2897     case 15:
2898       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2899       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2900       break;
2901     default:
2902       B->ops->mult    = MatMult_SeqBAIJ_N;
2903       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2904       break;
2905     }
2906   }
2907   B->ops->sor = MatSOR_SeqBAIJ;
2908   b->mbs = mbs;
2909   b->nbs = nbs;
2910   if (!skipallocation) {
2911     if (!b->imax) {
2912       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2913       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2914 
2915       b->free_imax_ilen = PETSC_TRUE;
2916     }
2917     /* b->ilen will count nonzeros in each block row so far. */
2918     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2919     if (!nnz) {
2920       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2921       else if (nz < 0) nz = 1;
2922       for (i=0; i<mbs; i++) b->imax[i] = nz;
2923       nz = nz*mbs;
2924     } else {
2925       nz = 0;
2926       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2927     }
2928 
2929     /* allocate the matrix space */
2930     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2931     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2932     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2933     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2934     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2935 
2936     b->singlemalloc = PETSC_TRUE;
2937     b->i[0]         = 0;
2938     for (i=1; i<mbs+1; i++) {
2939       b->i[i] = b->i[i-1] + b->imax[i-1];
2940     }
2941     b->free_a  = PETSC_TRUE;
2942     b->free_ij = PETSC_TRUE;
2943   } else {
2944     b->free_a  = PETSC_FALSE;
2945     b->free_ij = PETSC_FALSE;
2946   }
2947 
2948   b->bs2              = bs2;
2949   b->mbs              = mbs;
2950   b->nz               = 0;
2951   b->maxnz            = nz;
2952   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2953   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 #undef __FUNCT__
2958 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2959 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2960 {
2961   PetscInt       i,m,nz,nz_max=0,*nnz;
2962   PetscScalar    *values=0;
2963   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2964   PetscErrorCode ierr;
2965 
2966   PetscFunctionBegin;
2967   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2968   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2969   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2970   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2971   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2972   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2973   m    = B->rmap->n/bs;
2974 
2975   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2976   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2977   for (i=0; i<m; i++) {
2978     nz = ii[i+1]- ii[i];
2979     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2980     nz_max = PetscMax(nz_max, nz);
2981     nnz[i] = nz;
2982   }
2983   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2984   ierr = PetscFree(nnz);CHKERRQ(ierr);
2985 
2986   values = (PetscScalar*)V;
2987   if (!values) {
2988     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2989   }
2990   for (i=0; i<m; i++) {
2991     PetscInt          ncols  = ii[i+1] - ii[i];
2992     const PetscInt    *icols = jj + ii[i];
2993     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2994     if (!roworiented) {
2995       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2996     } else {
2997       PetscInt j;
2998       for (j=0; j<ncols; j++) {
2999         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3000         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3001       }
3002     }
3003   }
3004   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3005   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3006   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3007   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 /*MC
3012    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3013    block sparse compressed row format.
3014 
3015    Options Database Keys:
3016 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3017 
3018   Level: beginner
3019 
3020 .seealso: MatCreateSeqBAIJ()
3021 M*/
3022 
3023 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3024 
3025 #undef __FUNCT__
3026 #define __FUNCT__ "MatCreate_SeqBAIJ"
3027 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3028 {
3029   PetscErrorCode ierr;
3030   PetscMPIInt    size;
3031   Mat_SeqBAIJ    *b;
3032 
3033   PetscFunctionBegin;
3034   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3035   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3036 
3037   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3038   B->data = (void*)b;
3039   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3040 
3041   b->row          = 0;
3042   b->col          = 0;
3043   b->icol         = 0;
3044   b->reallocs     = 0;
3045   b->saved_values = 0;
3046 
3047   b->roworiented        = PETSC_TRUE;
3048   b->nonew              = 0;
3049   b->diag               = 0;
3050   B->spptr              = 0;
3051   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3052   b->keepnonzeropattern = PETSC_FALSE;
3053 
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3065   PetscFunctionReturn(0);
3066 }
3067 
3068 #undef __FUNCT__
3069 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3070 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3071 {
3072   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3073   PetscErrorCode ierr;
3074   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3075 
3076   PetscFunctionBegin;
3077   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3078 
3079   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3080     c->imax           = a->imax;
3081     c->ilen           = a->ilen;
3082     c->free_imax_ilen = PETSC_FALSE;
3083   } else {
3084     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3085     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3086     for (i=0; i<mbs; i++) {
3087       c->imax[i] = a->imax[i];
3088       c->ilen[i] = a->ilen[i];
3089     }
3090     c->free_imax_ilen = PETSC_TRUE;
3091   }
3092 
3093   /* allocate the matrix space */
3094   if (mallocmatspace) {
3095     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3096       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3097       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3098 
3099       c->i            = a->i;
3100       c->j            = a->j;
3101       c->singlemalloc = PETSC_FALSE;
3102       c->free_a       = PETSC_TRUE;
3103       c->free_ij      = PETSC_FALSE;
3104       c->parent       = A;
3105       C->preallocated = PETSC_TRUE;
3106       C->assembled    = PETSC_TRUE;
3107 
3108       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3109       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3110       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3111     } else {
3112       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3113       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3114 
3115       c->singlemalloc = PETSC_TRUE;
3116       c->free_a       = PETSC_TRUE;
3117       c->free_ij      = PETSC_TRUE;
3118 
3119       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3120       if (mbs > 0) {
3121         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3122         if (cpvalues == MAT_COPY_VALUES) {
3123           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3124         } else {
3125           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3126         }
3127       }
3128       C->preallocated = PETSC_TRUE;
3129       C->assembled    = PETSC_TRUE;
3130     }
3131   }
3132 
3133   c->roworiented = a->roworiented;
3134   c->nonew       = a->nonew;
3135 
3136   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3137   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3138 
3139   c->bs2         = a->bs2;
3140   c->mbs         = a->mbs;
3141   c->nbs         = a->nbs;
3142 
3143   if (a->diag) {
3144     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3145       c->diag      = a->diag;
3146       c->free_diag = PETSC_FALSE;
3147     } else {
3148       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3149       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3150       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3151       c->free_diag = PETSC_TRUE;
3152     }
3153   } else c->diag = 0;
3154 
3155   c->nz         = a->nz;
3156   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3157   c->solve_work = NULL;
3158   c->mult_work  = NULL;
3159   c->sor_workt  = NULL;
3160   c->sor_work   = NULL;
3161 
3162   c->compressedrow.use   = a->compressedrow.use;
3163   c->compressedrow.nrows = a->compressedrow.nrows;
3164   if (a->compressedrow.use) {
3165     i    = a->compressedrow.nrows;
3166     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3167     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3168     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3169     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3170   } else {
3171     c->compressedrow.use    = PETSC_FALSE;
3172     c->compressedrow.i      = NULL;
3173     c->compressedrow.rindex = NULL;
3174   }
3175   C->nonzerostate = A->nonzerostate;
3176 
3177   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3178   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 #undef __FUNCT__
3183 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3184 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3185 {
3186   PetscErrorCode ierr;
3187 
3188   PetscFunctionBegin;
3189   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3190   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3191   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3192   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3193   PetscFunctionReturn(0);
3194 }
3195 
3196 #undef __FUNCT__
3197 #define __FUNCT__ "MatLoad_SeqBAIJ"
3198 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3199 {
3200   Mat_SeqBAIJ    *a;
3201   PetscErrorCode ierr;
3202   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3203   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3204   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3205   PetscInt       *masked,nmask,tmp,bs2,ishift;
3206   PetscMPIInt    size;
3207   int            fd;
3208   PetscScalar    *aa;
3209   MPI_Comm       comm;
3210 
3211   PetscFunctionBegin;
3212   /* force binary viewer to load .info file if it has not yet done so */
3213   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3214   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3215   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3216   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3217   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3218   if (bs < 0) bs = 1;
3219   bs2  = bs*bs;
3220 
3221   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3222   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3223   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3224   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3225   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3226   M = header[1]; N = header[2]; nz = header[3];
3227 
3228   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3229   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3230 
3231   /*
3232      This code adds extra rows to make sure the number of rows is
3233     divisible by the blocksize
3234   */
3235   mbs        = M/bs;
3236   extra_rows = bs - M + bs*(mbs);
3237   if (extra_rows == bs) extra_rows = 0;
3238   else mbs++;
3239   if (extra_rows) {
3240     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3241   }
3242 
3243   /* Set global sizes if not already set */
3244   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3245     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3246   } else { /* Check if the matrix global sizes are correct */
3247     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3248     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3249       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3250     }
3251     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3252   }
3253 
3254   /* read in row lengths */
3255   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3256   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3257   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3258 
3259   /* read in column indices */
3260   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3261   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3262   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3263 
3264   /* loop over row lengths determining block row lengths */
3265   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3266   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3267   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3268   rowcount = 0;
3269   nzcount  = 0;
3270   for (i=0; i<mbs; i++) {
3271     nmask = 0;
3272     for (j=0; j<bs; j++) {
3273       kmax = rowlengths[rowcount];
3274       for (k=0; k<kmax; k++) {
3275         tmp = jj[nzcount++]/bs;
3276         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3277       }
3278       rowcount++;
3279     }
3280     browlengths[i] += nmask;
3281     /* zero out the mask elements we set */
3282     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3283   }
3284 
3285   /* Do preallocation  */
3286   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3287   a    = (Mat_SeqBAIJ*)newmat->data;
3288 
3289   /* set matrix "i" values */
3290   a->i[0] = 0;
3291   for (i=1; i<= mbs; i++) {
3292     a->i[i]      = a->i[i-1] + browlengths[i-1];
3293     a->ilen[i-1] = browlengths[i-1];
3294   }
3295   a->nz = 0;
3296   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3297 
3298   /* read in nonzero values */
3299   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3300   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3301   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3302 
3303   /* set "a" and "j" values into matrix */
3304   nzcount = 0; jcount = 0;
3305   for (i=0; i<mbs; i++) {
3306     nzcountb = nzcount;
3307     nmask    = 0;
3308     for (j=0; j<bs; j++) {
3309       kmax = rowlengths[i*bs+j];
3310       for (k=0; k<kmax; k++) {
3311         tmp = jj[nzcount++]/bs;
3312         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3313       }
3314     }
3315     /* sort the masked values */
3316     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3317 
3318     /* set "j" values into matrix */
3319     maskcount = 1;
3320     for (j=0; j<nmask; j++) {
3321       a->j[jcount++]  = masked[j];
3322       mask[masked[j]] = maskcount++;
3323     }
3324     /* set "a" values into matrix */
3325     ishift = bs2*a->i[i];
3326     for (j=0; j<bs; j++) {
3327       kmax = rowlengths[i*bs+j];
3328       for (k=0; k<kmax; k++) {
3329         tmp       = jj[nzcountb]/bs;
3330         block     = mask[tmp] - 1;
3331         point     = jj[nzcountb] - bs*tmp;
3332         idx       = ishift + bs2*block + j + bs*point;
3333         a->a[idx] = (MatScalar)aa[nzcountb++];
3334       }
3335     }
3336     /* zero out the mask elements we set */
3337     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3338   }
3339   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3340 
3341   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3342   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3343   ierr = PetscFree(aa);CHKERRQ(ierr);
3344   ierr = PetscFree(jj);CHKERRQ(ierr);
3345   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3346 
3347   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3348   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatCreateSeqBAIJ"
3354 /*@C
3355    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3356    compressed row) format.  For good matrix assembly performance the
3357    user should preallocate the matrix storage by setting the parameter nz
3358    (or the array nnz).  By setting these parameters accurately, performance
3359    during matrix assembly can be increased by more than a factor of 50.
3360 
3361    Collective on MPI_Comm
3362 
3363    Input Parameters:
3364 +  comm - MPI communicator, set to PETSC_COMM_SELF
3365 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3366           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3367 .  m - number of rows
3368 .  n - number of columns
3369 .  nz - number of nonzero blocks  per block row (same for all rows)
3370 -  nnz - array containing the number of nonzero blocks in the various block rows
3371          (possibly different for each block row) or NULL
3372 
3373    Output Parameter:
3374 .  A - the matrix
3375 
3376    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3377    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3378    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3379 
3380    Options Database Keys:
3381 .   -mat_no_unroll - uses code that does not unroll the loops in the
3382                      block calculations (much slower)
3383 .    -mat_block_size - size of the blocks to use
3384 
3385    Level: intermediate
3386 
3387    Notes:
3388    The number of rows and columns must be divisible by blocksize.
3389 
3390    If the nnz parameter is given then the nz parameter is ignored
3391 
3392    A nonzero block is any block that as 1 or more nonzeros in it
3393 
3394    The block AIJ format is fully compatible with standard Fortran 77
3395    storage.  That is, the stored row and column indices can begin at
3396    either one (as in Fortran) or zero.  See the users' manual for details.
3397 
3398    Specify the preallocated storage with either nz or nnz (not both).
3399    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3400    allocation.  See Users-Manual: ch_mat for details.
3401    matrices.
3402 
3403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3404 @*/
3405 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3406 {
3407   PetscErrorCode ierr;
3408 
3409   PetscFunctionBegin;
3410   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3411   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3412   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3413   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3414   PetscFunctionReturn(0);
3415 }
3416 
3417 #undef __FUNCT__
3418 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3419 /*@C
3420    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3421    per row in the matrix. For good matrix assembly performance the
3422    user should preallocate the matrix storage by setting the parameter nz
3423    (or the array nnz).  By setting these parameters accurately, performance
3424    during matrix assembly can be increased by more than a factor of 50.
3425 
3426    Collective on MPI_Comm
3427 
3428    Input Parameters:
3429 +  B - the matrix
3430 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3431           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3432 .  nz - number of block nonzeros per block row (same for all rows)
3433 -  nnz - array containing the number of block nonzeros in the various block rows
3434          (possibly different for each block row) or NULL
3435 
3436    Options Database Keys:
3437 .   -mat_no_unroll - uses code that does not unroll the loops in the
3438                      block calculations (much slower)
3439 .   -mat_block_size - size of the blocks to use
3440 
3441    Level: intermediate
3442 
3443    Notes:
3444    If the nnz parameter is given then the nz parameter is ignored
3445 
3446    You can call MatGetInfo() to get information on how effective the preallocation was;
3447    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3448    You can also run with the option -info and look for messages with the string
3449    malloc in them to see if additional memory allocation was needed.
3450 
3451    The block AIJ format is fully compatible with standard Fortran 77
3452    storage.  That is, the stored row and column indices can begin at
3453    either one (as in Fortran) or zero.  See the users' manual for details.
3454 
3455    Specify the preallocated storage with either nz or nnz (not both).
3456    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3457    allocation.  See Users-Manual: ch_mat for details.
3458 
3459 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3460 @*/
3461 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3462 {
3463   PetscErrorCode ierr;
3464 
3465   PetscFunctionBegin;
3466   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3467   PetscValidType(B,1);
3468   PetscValidLogicalCollectiveInt(B,bs,2);
3469   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3470   PetscFunctionReturn(0);
3471 }
3472 
3473 #undef __FUNCT__
3474 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3475 /*@C
3476    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3477    (the default sequential PETSc format).
3478 
3479    Collective on MPI_Comm
3480 
3481    Input Parameters:
3482 +  B - the matrix
3483 .  i - the indices into j for the start of each local row (starts with zero)
3484 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3485 -  v - optional values in the matrix
3486 
3487    Level: developer
3488 
3489    Notes:
3490    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3491    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3492    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3493    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3494    block column and the second index is over columns within a block.
3495 
3496 .keywords: matrix, aij, compressed row, sparse
3497 
3498 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3499 @*/
3500 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3501 {
3502   PetscErrorCode ierr;
3503 
3504   PetscFunctionBegin;
3505   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3506   PetscValidType(B,1);
3507   PetscValidLogicalCollectiveInt(B,bs,2);
3508   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3509   PetscFunctionReturn(0);
3510 }
3511 
3512 
3513 #undef __FUNCT__
3514 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3515 /*@
3516      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3517 
3518      Collective on MPI_Comm
3519 
3520    Input Parameters:
3521 +  comm - must be an MPI communicator of size 1
3522 .  bs - size of block
3523 .  m - number of rows
3524 .  n - number of columns
3525 .  i - row indices
3526 .  j - column indices
3527 -  a - matrix values
3528 
3529    Output Parameter:
3530 .  mat - the matrix
3531 
3532    Level: advanced
3533 
3534    Notes:
3535        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3536     once the matrix is destroyed
3537 
3538        You cannot set new nonzero locations into this matrix, that will generate an error.
3539 
3540        The i and j indices are 0 based
3541 
3542        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3543 
3544       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3545       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3546       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3547       with column-major ordering within blocks.
3548 
3549 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3550 
3551 @*/
3552 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3553 {
3554   PetscErrorCode ierr;
3555   PetscInt       ii;
3556   Mat_SeqBAIJ    *baij;
3557 
3558   PetscFunctionBegin;
3559   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3560   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3561 
3562   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3563   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3564   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3565   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3566   baij = (Mat_SeqBAIJ*)(*mat)->data;
3567   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3568   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3569 
3570   baij->i = i;
3571   baij->j = j;
3572   baij->a = a;
3573 
3574   baij->singlemalloc = PETSC_FALSE;
3575   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3576   baij->free_a       = PETSC_FALSE;
3577   baij->free_ij      = PETSC_FALSE;
3578 
3579   for (ii=0; ii<m; ii++) {
3580     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3581 #if defined(PETSC_USE_DEBUG)
3582     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3583 #endif
3584   }
3585 #if defined(PETSC_USE_DEBUG)
3586   for (ii=0; ii<baij->i[m]; ii++) {
3587     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3588     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3589   }
3590 #endif
3591 
3592   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3593   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3594   PetscFunctionReturn(0);
3595 }
3596 
3597 #undef __FUNCT__
3598 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3600 {
3601   PetscErrorCode ierr;
3602 
3603   PetscFunctionBegin;
3604   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3605   PetscFunctionReturn(0);
3606 }
3607