xref: /petsc/src/mat/impls/baij/seq/baij.c (revision e48d15ef255607eaac65c71635979f78eabeae7c)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #define PetscKernel_v_gets_A_times_w_1_exp(v,A,w,exp) \
128 do {                                                  \
129   v[0] exp A[0]*w[0];                                 \
130 } while (0)
131 
132 #define PetscKernel_v_gets_A_times_w_2_exp(v,A,w,exp) \
133 do {                                                  \
134   v[0] exp A[0]*w[0] + A[2]*w[1];                      \
135   v[1] exp A[1]*w[0] + A[3]*w[1];                      \
136 } while (0)
137 
138 #define PetscKernel_v_gets_A_times_w_3_exp(v,A,w,exp) \
139 do {                                                  \
140   v[0] exp A[0]*w[0] + A[3]*w[1] + A[6]*w[2];         \
141   v[1] exp A[1]*w[0] + A[4]*w[1] + A[7]*w[2];         \
142   v[2] exp A[2]*w[0] + A[5]*w[1] + A[8]*w[2];         \
143 } while (0)
144 
145 #define PetscKernel_v_gets_A_times_w_4_exp(v,A,w,exp)       \
146 do {                                                        \
147   v[0] exp A[0]*w[0] + A[4]*w[1] + A[8] *w[2] + A[12]*w[3]; \
148   v[1] exp A[1]*w[0] + A[5]*w[1] + A[9] *w[2] + A[13]*w[3]; \
149   v[2] exp A[2]*w[0] + A[6]*w[1] + A[10]*w[2] + A[14]*w[3]; \
150   v[3] exp A[3]*w[0] + A[7]*w[1] + A[11]*w[2] + A[15]*w[3]; \
151 } while (0)
152 
153 #define PetscKernel_v_gets_A_times_w_4_exp(v,A,w,exp)       \
154 do {                                                        \
155   v[0] exp A[0]*w[0] + A[4]*w[1] + A[8] *w[2] + A[12]*w[3]; \
156   v[1] exp A[1]*w[0] + A[5]*w[1] + A[9] *w[2] + A[13]*w[3]; \
157   v[2] exp A[2]*w[0] + A[6]*w[1] + A[10]*w[2] + A[14]*w[3]; \
158   v[3] exp A[3]*w[0] + A[7]*w[1] + A[11]*w[2] + A[15]*w[3]; \
159 } while (0)
160 
161 #define PetscKernel_v_gets_A_times_w_5_exp(v,A,w,exp)                    \
162 do {                                                                     \
163   v[0] exp A[0]*w[0] + A[5]*w[1] + A[10]*w[2] + A[15]*w[3] + A[20]*w[4]; \
164   v[1] exp A[1]*w[0] + A[6]*w[1] + A[11]*w[2] + A[16]*w[3] + A[21]*w[4]; \
165   v[2] exp A[2]*w[0] + A[7]*w[1] + A[12]*w[2] + A[17]*w[3] + A[22]*w[4]; \
166   v[3] exp A[3]*w[0] + A[8]*w[1] + A[13]*w[2] + A[18]*w[3] + A[23]*w[4]; \
167   v[4] exp A[4]*w[0] + A[9]*w[1] + A[14]*w[2] + A[19]*w[3] + A[24]*w[4]; \
168 } while (0)
169 
170 #define PetscKernel_v_gets_A_times_w_6_exp(v,A,w,exp)                                  \
171 do {                                                                                   \
172   v[0] exp A[0]*w[0] + A[6] *w[1] + A[12]*w[2] + A[18]*w[3] + A[24]*w[4] + A[30]*w[5]; \
173   v[1] exp A[1]*w[0] + A[7] *w[1] + A[13]*w[2] + A[19]*w[3] + A[25]*w[4] + A[31]*w[5]; \
174   v[2] exp A[2]*w[0] + A[8] *w[1] + A[14]*w[2] + A[20]*w[3] + A[26]*w[4] + A[32]*w[5]; \
175   v[3] exp A[3]*w[0] + A[9] *w[1] + A[15]*w[2] + A[21]*w[3] + A[27]*w[4] + A[33]*w[5]; \
176   v[4] exp A[4]*w[0] + A[10]*w[1] + A[16]*w[2] + A[22]*w[3] + A[28]*w[4] + A[34]*w[5]; \
177   v[5] exp A[5]*w[0] + A[11]*w[1] + A[17]*w[2] + A[23]*w[3] + A[29]*w[4] + A[35]*w[5]; \
178 } while (0)
179 
180 #define PetscKernel_v_gets_A_times_w_7_exp(v,A,w,exp)                                               \
181 do {                                                                                                \
182   v[0] exp A[0]*w[0] + A[7] *w[1] + A[14]*w[2] + A[21]*w[3] + A[28]*w[4] + A[35]*w[5] + A[42]*w[6]; \
183   v[1] exp A[1]*w[0] + A[8] *w[1] + A[15]*w[2] + A[22]*w[3] + A[29]*w[4] + A[36]*w[5] + A[43]*w[6]; \
184   v[2] exp A[2]*w[0] + A[9] *w[1] + A[16]*w[2] + A[23]*w[3] + A[30]*w[4] + A[37]*w[5] + A[44]*w[6]; \
185   v[3] exp A[3]*w[0] + A[10]*w[1] + A[17]*w[2] + A[24]*w[3] + A[31]*w[4] + A[38]*w[5] + A[45]*w[6]; \
186   v[4] exp A[4]*w[0] + A[11]*w[1] + A[18]*w[2] + A[25]*w[3] + A[32]*w[4] + A[39]*w[5] + A[46]*w[6]; \
187   v[5] exp A[5]*w[0] + A[12]*w[1] + A[19]*w[2] + A[26]*w[3] + A[33]*w[4] + A[40]*w[5] + A[47]*w[6]; \
188   v[6] exp A[6]*w[0] + A[13]*w[1] + A[20]*w[2] + A[27]*w[3] + A[34]*w[4] + A[41]*w[5] + A[48]*w[6]; \
189 } while (0)
190 
191 #define PetscKernel_v_gets_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,=)
192 #define PetscKernel_v_gets_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,=)
193 #define PetscKernel_v_gets_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,=)
194 #define PetscKernel_v_gets_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,=)
195 #define PetscKernel_v_gets_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,=)
196 #define PetscKernel_v_gets_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,=)
197 #define PetscKernel_v_gets_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,=)
198 #define PetscKernel_v_gets_v_plus_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,+=)
199 #define PetscKernel_v_gets_v_plus_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,+=)
200 #define PetscKernel_v_gets_v_plus_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,+=)
201 #define PetscKernel_v_gets_v_plus_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,+=)
202 #define PetscKernel_v_gets_v_plus_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,+=)
203 #define PetscKernel_v_gets_v_plus_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,+=)
204 #define PetscKernel_v_gets_v_plus_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,+=)
205 #define PetscKernel_v_gets_v_minus_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,-=)
206 #define PetscKernel_v_gets_v_minus_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,-=)
207 #define PetscKernel_v_gets_v_minus_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,-=)
208 #define PetscKernel_v_gets_v_minus_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,-=)
209 #define PetscKernel_v_gets_v_minus_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,-=)
210 #define PetscKernel_v_gets_v_minus_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,-=)
211 #define PetscKernel_v_gets_v_minus_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,-=)
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatSOR_SeqBAIJ"
215 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
216 {
217   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
218   PetscScalar       *x,*work,*w,*workt,*t;
219   const MatScalar   *v,*aa = a->a, *idiag;
220   const PetscScalar *b,*xb;
221   PetscScalar       s[7], xw[7];
222   PetscErrorCode    ierr;
223   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
224   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   its = its*lits;
228   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
229   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233 
234   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
235 
236   if (!m) PetscFunctionReturn(0);
237   diag  = a->diag;
238   idiag = a->idiag;
239   k    = PetscMax(A->rmap->n,A->cmap->n);
240   if (!a->mult_work) {
241     ierr = PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
242   }
243   work = a->mult_work;
244   t = work + k+1;
245   if (!a->sor_work) {
246     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
247   }
248   w = a->sor_work;
249 
250   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
251   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
252 
253   if (flag & SOR_ZERO_INITIAL_GUESS) {
254     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
255       switch (bs) {
256       case 1:
257         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
258         t[0] = b[0];
259         i2     = 1;
260         idiag += 1;
261         for (i=1; i<m; i++) {
262           v  = aa + ai[i];
263           vi = aj + ai[i];
264           nz = diag[i] - ai[i];
265           s[0] = b[i2];
266           for (j=0; j<nz; j++) {
267             xw[0] = x[vi[j]];
268             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
269           }
270           t[i2] = s[0];
271           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
272           x[i2]  = xw[0];
273           idiag += 1;
274           i2    += 1;
275         }
276         break;
277       case 2:
278         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
279         t[0] = b[0]; t[1] = b[1];
280         i2     = 2;
281         idiag += 4;
282         for (i=1; i<m; i++) {
283           v  = aa + 4*ai[i];
284           vi = aj + ai[i];
285           nz = diag[i] - ai[i];
286           s[0] = b[i2]; s[1] = b[i2+1];
287           for (j=0; j<nz; j++) {
288             idx = 2*vi[j];
289             it  = 4*j;
290             xw[0] = x[idx]; xw[1] = x[1+idx];
291             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
292           }
293           t[i2] = s[0]; t[i2+1] = s[1];
294           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
295           x[i2]   = xw[0]; x[i2+1] = xw[1];
296           idiag  += 4;
297           i2     += 2;
298         }
299         break;
300       case 3:
301         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
302         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
303         i2     = 3;
304         idiag += 9;
305         for (i=1; i<m; i++) {
306           v  = aa + 9*ai[i];
307           vi = aj + ai[i];
308           nz = diag[i] - ai[i];
309           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
310           while (nz--) {
311             idx = 3*(*vi++);
312             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
313             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
314             v  += 9;
315           }
316           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
317           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
318           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
319           idiag  += 9;
320           i2     += 3;
321         }
322         break;
323       case 4:
324         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
325         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
326         i2     = 4;
327         idiag += 16;
328         for (i=1; i<m; i++) {
329           v  = aa + 16*ai[i];
330           vi = aj + ai[i];
331           nz = diag[i] - ai[i];
332           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
333           while (nz--) {
334             idx = 4*(*vi++);
335             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
336             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
337             v  += 16;
338           }
339           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
340           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
341           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
342           idiag  += 16;
343           i2     += 4;
344         }
345         break;
346       case 5:
347         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
348         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
349         i2     = 5;
350         idiag += 25;
351         for (i=1; i<m; i++) {
352           v  = aa + 25*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
356           while (nz--) {
357             idx = 5*(*vi++);
358             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
359             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
360             v  += 25;
361           }
362           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
363           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
364           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
365           idiag  += 25;
366           i2     += 5;
367         }
368         break;
369       case 6:
370         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
371         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
372         i2     = 6;
373         idiag += 36;
374         for (i=1; i<m; i++) {
375           v  = aa + 36*ai[i];
376           vi = aj + ai[i];
377           nz = diag[i] - ai[i];
378           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
379           while (nz--) {
380             idx = 6*(*vi++);
381             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
382             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
383             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
384             v  += 36;
385           }
386           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
387           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
388           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
389           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
390           idiag  += 36;
391           i2     += 6;
392         }
393         break;
394       case 7:
395         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
396         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
397         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
398         i2     = 7;
399         idiag += 49;
400         for (i=1; i<m; i++) {
401           v  = aa + 49*ai[i];
402           vi = aj + ai[i];
403           nz = diag[i] - ai[i];
404           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
405           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
406           while (nz--) {
407             idx = 7*(*vi++);
408             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
409             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
410             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
411             v  += 49;
412           }
413           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
414           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
415           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
416           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
417           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
418           idiag  += 49;
419           i2     += 7;
420         }
421         break;
422       default:
423         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
424         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
425         i2     = bs;
426         idiag += bs2;
427         for (i=1; i<m; i++) {
428           v  = aa + bs2*ai[i];
429           vi = aj + ai[i];
430           nz = diag[i] - ai[i];
431 
432           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
433           /* copy all rows of x that are needed into contiguous space */
434           workt = work;
435           for (j=0; j<nz; j++) {
436             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
437             workt += bs;
438           }
439           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
440           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
441           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
442 
443           idiag += bs2;
444           i2    += bs;
445         }
446         break;
447       }
448       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
449       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
450       xb = t;
451     }
452     else xb = b;
453     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
454       idiag = a->idiag+bs2*(a->mbs-1);
455       i2 = bs * (m-1);
456       switch (bs) {
457       case 1:
458         s[0]  = xb[i2];
459         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
460         x[i2] = xw[0];
461         i2   -= 1;
462         for (i=m-2; i>=0; i--) {
463           v  = aa + (diag[i]+1);
464           vi = aj + diag[i] + 1;
465           nz = ai[i+1] - diag[i] - 1;
466           s[0] = xb[i2];
467           for (j=0; j<nz; j++) {
468             xw[0] = x[vi[j]];
469             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
470           }
471           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
472           x[i2]  = xw[0];
473           idiag -= 1;
474           i2    -= 1;
475         }
476         break;
477       case 2:
478         s[0]  = xb[i2]; s[1] = xb[i2+1];
479         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
480         x[i2] = xw[0]; x[i2+1] = xw[1];
481         i2    -= 2;
482         idiag -= 4;
483         for (i=m-2; i>=0; i--) {
484           v  = aa + 4*(diag[i] + 1);
485           vi = aj + diag[i] + 1;
486           nz = ai[i+1] - diag[i] - 1;
487           s[0] = xb[i2]; s[1] = xb[i2+1];
488           for (j=0; j<nz; j++) {
489             idx = 2*vi[j];
490             it  = 4*j;
491             xw[0] = x[idx]; xw[1] = x[1+idx];
492             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
493           }
494           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
495           x[i2]   = xw[0]; x[i2+1] = xw[1];
496           idiag  -= 4;
497           i2     -= 2;
498         }
499         break;
500       case 3:
501         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
502         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
503         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
504         i2    -= 3;
505         idiag -= 9;
506         for (i=m-2; i>=0; i--) {
507           v  = aa + 9*(diag[i]+1);
508           vi = aj + diag[i] + 1;
509           nz = ai[i+1] - diag[i] - 1;
510           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
511           while (nz--) {
512             idx = 3*(*vi++);
513             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
514             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
515             v  += 9;
516           }
517           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
518           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
519           idiag  -= 9;
520           i2     -= 3;
521         }
522         break;
523       case 4:
524         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
525         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
526         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
527         i2    -= 4;
528         idiag -= 16;
529         for (i=m-2; i>=0; i--) {
530           v  = aa + 16*(diag[i]+1);
531           vi = aj + diag[i] + 1;
532           nz = ai[i+1] - diag[i] - 1;
533           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
534           while (nz--) {
535             idx = 4*(*vi++);
536             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
537             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
538             v  += 16;
539           }
540           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
541           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
542           idiag  -= 16;
543           i2     -= 4;
544         }
545         break;
546       case 5:
547         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
548         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
549         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
550         i2    -= 5;
551         idiag -= 25;
552         for (i=m-2; i>=0; i--) {
553           v  = aa + 25*(diag[i]+1);
554           vi = aj + diag[i] + 1;
555           nz = ai[i+1] - diag[i] - 1;
556           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
557           while (nz--) {
558             idx = 5*(*vi++);
559             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
560             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
561             v  += 25;
562           }
563           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
564           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
565           idiag  -= 25;
566           i2     -= 5;
567         }
568         break;
569       case 6:
570         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
571         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
572         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
573         i2    -= 6;
574         idiag -= 36;
575         for (i=m-2; i>=0; i--) {
576           v  = aa + 36*(diag[i]+1);
577           vi = aj + diag[i] + 1;
578           nz = ai[i+1] - diag[i] - 1;
579           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
580           while (nz--) {
581             idx = 6*(*vi++);
582             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
583             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
584             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
585             v  += 36;
586           }
587           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
588           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
589           idiag  -= 36;
590           i2     -= 6;
591         }
592         break;
593       case 7:
594         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
595         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
596         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
597         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
598         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
599         i2    -= 7;
600         idiag -= 49;
601         for (i=m-2; i>=0; i--) {
602           v  = aa + 49*(diag[i]+1);
603           vi = aj + diag[i] + 1;
604           nz = ai[i+1] - diag[i] - 1;
605           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
606           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
607           while (nz--) {
608             idx = 7*(*vi++);
609             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
610             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
611             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
612             v  += 49;
613           }
614           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
615           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
616           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
617           idiag  -= 49;
618           i2     -= 7;
619         }
620         break;
621       default:
622         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
623         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
624         i2    -= bs;
625         idiag -= bs2;
626         for (i=m-2; i>=0; i--) {
627           v  = aa + bs2*(diag[i]+1);
628           vi = aj + diag[i] + 1;
629           nz = ai[i+1] - diag[i] - 1;
630 
631           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
632           /* copy all rows of x that are needed into contiguous space */
633           workt = work;
634           for (j=0; j<nz; j++) {
635             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
636             workt += bs;
637           }
638           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
639           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
640 
641           idiag -= bs2;
642           i2    -= bs;
643         }
644         break;
645       }
646       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
647     }
648     its--;
649   }
650   while (its--) {
651     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
652       idiag = a->idiag;
653       i2 = 0;
654       switch (bs) {
655       case 1:
656         for (i=0; i<m; i++) {
657           v  = aa + ai[i];
658           vi = aj + ai[i];
659           nz = ai[i+1] - ai[i];
660           s[0] = b[i2];
661           for (j=0; j<nz; j++) {
662             xw[0] = x[vi[j]];
663             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
664           }
665           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
666           x[i2] += xw[0];
667           idiag += 1;
668           i2    += 1;
669         }
670         break;
671       case 2:
672         for (i=0; i<m; i++) {
673           v  = aa + 4*ai[i];
674           vi = aj + ai[i];
675           nz = ai[i+1] - ai[i];
676           s[0] = b[i2]; s[1] = b[i2+1];
677           for (j=0; j<nz; j++) {
678             idx = 2*vi[j];
679             it  = 4*j;
680             xw[0] = x[idx]; xw[1] = x[1+idx];
681             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
682           }
683           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
684           x[i2]  += xw[0]; x[i2+1] += xw[1];
685           idiag  += 4;
686           i2     += 2;
687         }
688         break;
689       case 3:
690         for (i=0; i<m; i++) {
691           v  = aa + 9*ai[i];
692           vi = aj + ai[i];
693           nz = ai[i+1] - ai[i];
694           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
695           while (nz--) {
696             idx = 3*(*vi++);
697             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
699             v  += 9;
700           }
701           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
702           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           idiag  += 9;
704           i2     += 3;
705         }
706         break;
707       case 4:
708         for (i=0; i<m; i++) {
709           v  = aa + 16*ai[i];
710           vi = aj + ai[i];
711           nz = ai[i+1] - ai[i];
712           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
713           while (nz--) {
714             idx = 4*(*vi++);
715             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
716             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
717             v  += 16;
718           }
719           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
720           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
721           idiag  += 16;
722           i2     += 4;
723         }
724         break;
725       case 5:
726         for (i=0; i<m; i++) {
727           v  = aa + 25*ai[i];
728           vi = aj + ai[i];
729           nz = ai[i+1] - ai[i];
730           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
731           while (nz--) {
732             idx = 5*(*vi++);
733             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
734             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
735             v  += 25;
736           }
737           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
738           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
739           idiag  += 25;
740           i2     += 5;
741         }
742         break;
743       case 6:
744         for (i=0; i<m; i++) {
745           v  = aa + 36*ai[i];
746           vi = aj + ai[i];
747           nz = ai[i+1] - ai[i];
748           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
749           while (nz--) {
750             idx = 6*(*vi++);
751             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
752             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
753             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
754             v  += 36;
755           }
756           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
757           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
758           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
759           idiag  += 36;
760           i2     += 6;
761         }
762         break;
763       case 7:
764         for (i=0; i<m; i++) {
765           v  = aa + 49*ai[i];
766           vi = aj + ai[i];
767           nz = ai[i+1] - ai[i];
768           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
769           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
770           while (nz--) {
771             idx = 7*(*vi++);
772             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
773             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
774             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
775             v  += 49;
776           }
777           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
778           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
779           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
780           idiag  += 49;
781           i2     += 7;
782         }
783         break;
784       default:
785         for (i=0; i<m; i++) {
786           v  = aa + bs2*ai[i];
787           vi = aj + ai[i];
788           nz = ai[i+1] - ai[i];
789 
790           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
791           /* copy all rows of x that are needed into contiguous space */
792           workt = work;
793           for (j=0; j<nz; j++) {
794             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
795             workt += bs;
796           }
797           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
798           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
799 
800           idiag += bs2;
801           i2    += bs;
802         }
803         break;
804       }
805       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
806     }
807     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
808       idiag = a->idiag+bs2*(a->mbs-1);
809       i2 = bs * (m-1);
810       switch (bs) {
811       case 1:
812         for (i=m-1; i>=0; i--) {
813           v  = aa + ai[i];
814           vi = aj + ai[i];
815           nz = ai[i+1] - ai[i];
816           s[0] = b[i2];
817           for (j=0; j<nz; j++) {
818             xw[0] = x[vi[j]];
819             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
820           }
821           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
822           x[i2] += xw[0];
823           idiag -= 1;
824           i2    -= 1;
825         }
826         break;
827       case 2:
828         for (i=m-1; i>=0; i--) {
829           v  = aa + 4*ai[i];
830           vi = aj + ai[i];
831           nz = ai[i+1] - ai[i];
832           s[0] = b[i2]; s[1] = b[i2+1];
833           for (j=0; j<nz; j++) {
834             idx = 2*vi[j];
835             it  = 4*j;
836             xw[0] = x[idx]; xw[1] = x[1+idx];
837             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
838           }
839           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
840           x[i2]  += xw[0]; x[i2+1] += xw[1];
841           idiag  -= 4;
842           i2     -= 2;
843         }
844         break;
845       case 3:
846         for (i=m-1; i>=0; i--) {
847           v  = aa + 9*ai[i];
848           vi = aj + ai[i];
849           nz = ai[i+1] - ai[i];
850           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
851           while (nz--) {
852             idx = 3*(*vi++);
853             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
855             v  += 9;
856           }
857           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
858           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           idiag  -= 9;
860           i2     -= 3;
861         }
862         break;
863       case 4:
864         for (i=m-1; i>=0; i--) {
865           v  = aa + 16*ai[i];
866           vi = aj + ai[i];
867           nz = ai[i+1] - ai[i];
868           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
869           while (nz--) {
870             idx = 4*(*vi++);
871             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
872             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
873             v  += 16;
874           }
875           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
876           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
877           idiag  -= 16;
878           i2     -= 4;
879         }
880         break;
881       case 5:
882         for (i=m-1; i>=0; i--) {
883           v  = aa + 25*ai[i];
884           vi = aj + ai[i];
885           nz = ai[i+1] - ai[i];
886           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
887           while (nz--) {
888             idx = 5*(*vi++);
889             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
890             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
891             v  += 25;
892           }
893           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
894           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
895           idiag  -= 25;
896           i2     -= 5;
897         }
898         break;
899       case 6:
900         for (i=m-1; i>=0; i--) {
901           v  = aa + 36*ai[i];
902           vi = aj + ai[i];
903           nz = ai[i+1] - ai[i];
904           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
905           while (nz--) {
906             idx = 6*(*vi++);
907             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
908             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
909             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
910             v  += 36;
911           }
912           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
913           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
914           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
915           idiag  -= 36;
916           i2     -= 6;
917         }
918         break;
919       case 7:
920         for (i=m-1; i>=0; i--) {
921           v  = aa + 49*ai[i];
922           vi = aj + ai[i];
923           nz = ai[i+1] - ai[i];
924           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
925           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
926           while (nz--) {
927             idx = 7*(*vi++);
928             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
929             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
930             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
931             v  += 49;
932           }
933           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
934           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
935           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
936           idiag  -= 49;
937           i2     -= 7;
938         }
939         break;
940       default:
941         for (i=m-1; i>=0; i--) {
942           v  = aa + bs2*ai[i];
943           vi = aj + ai[i];
944           nz = ai[i+1] - ai[i];
945 
946           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
947           /* copy all rows of x that are needed into contiguous space */
948           workt = work;
949           for (j=0; j<nz; j++) {
950             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
951             workt += bs;
952           }
953           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
954           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
955 
956           idiag -= bs2;
957           i2    -= bs;
958         }
959         break;
960       }
961       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
962     }
963   }
964   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 
970 /*
971     Special version for direct calls from Fortran (Used in PETSc-fun3d)
972 */
973 #if defined(PETSC_HAVE_FORTRAN_CAPS)
974 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
975 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
976 #define matsetvaluesblocked4_ matsetvaluesblocked4
977 #endif
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "matsetvaluesblocked4_"
981 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
982 {
983   Mat               A  = *AA;
984   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
985   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
986   PetscInt          *ai    =a->i,*ailen=a->ilen;
987   PetscInt          *aj    =a->j,stepval,lastcol = -1;
988   const PetscScalar *value = v;
989   MatScalar         *ap,*aa = a->a,*bap;
990 
991   PetscFunctionBegin;
992   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
993   stepval = (n-1)*4;
994   for (k=0; k<m; k++) { /* loop over added rows */
995     row  = im[k];
996     rp   = aj + ai[row];
997     ap   = aa + 16*ai[row];
998     nrow = ailen[row];
999     low  = 0;
1000     high = nrow;
1001     for (l=0; l<n; l++) { /* loop over added columns */
1002       col = in[l];
1003       if (col <= lastcol)  low = 0;
1004       else                high = nrow;
1005       lastcol = col;
1006       value   = v + k*(stepval+4 + l)*4;
1007       while (high-low > 7) {
1008         t = (low+high)/2;
1009         if (rp[t] > col) high = t;
1010         else             low  = t;
1011       }
1012       for (i=low; i<high; i++) {
1013         if (rp[i] > col) break;
1014         if (rp[i] == col) {
1015           bap = ap +  16*i;
1016           for (ii=0; ii<4; ii++,value+=stepval) {
1017             for (jj=ii; jj<16; jj+=4) {
1018               bap[jj] += *value++;
1019             }
1020           }
1021           goto noinsert2;
1022         }
1023       }
1024       N = nrow++ - 1;
1025       high++; /* added new column index thus must search to one higher than before */
1026       /* shift up all the later entries in this row */
1027       for (ii=N; ii>=i; ii--) {
1028         rp[ii+1] = rp[ii];
1029         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1030       }
1031       if (N >= i) {
1032         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1033       }
1034       rp[i] = col;
1035       bap   = ap +  16*i;
1036       for (ii=0; ii<4; ii++,value+=stepval) {
1037         for (jj=ii; jj<16; jj+=4) {
1038           bap[jj] = *value++;
1039         }
1040       }
1041       noinsert2:;
1042       low = i;
1043     }
1044     ailen[row] = nrow;
1045   }
1046   PetscFunctionReturnVoid();
1047 }
1048 
1049 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1050 #define matsetvalues4_ MATSETVALUES4
1051 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1052 #define matsetvalues4_ matsetvalues4
1053 #endif
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatSetValues4_"
1057 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1058 {
1059   Mat         A  = *AA;
1060   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1061   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1062   PetscInt    *ai=a->i,*ailen=a->ilen;
1063   PetscInt    *aj=a->j,brow,bcol;
1064   PetscInt    ridx,cidx,lastcol = -1;
1065   MatScalar   *ap,value,*aa=a->a,*bap;
1066 
1067   PetscFunctionBegin;
1068   for (k=0; k<m; k++) { /* loop over added rows */
1069     row  = im[k]; brow = row/4;
1070     rp   = aj + ai[brow];
1071     ap   = aa + 16*ai[brow];
1072     nrow = ailen[brow];
1073     low  = 0;
1074     high = nrow;
1075     for (l=0; l<n; l++) { /* loop over added columns */
1076       col   = in[l]; bcol = col/4;
1077       ridx  = row % 4; cidx = col % 4;
1078       value = v[l + k*n];
1079       if (col <= lastcol)  low = 0;
1080       else                high = nrow;
1081       lastcol = col;
1082       while (high-low > 7) {
1083         t = (low+high)/2;
1084         if (rp[t] > bcol) high = t;
1085         else              low  = t;
1086       }
1087       for (i=low; i<high; i++) {
1088         if (rp[i] > bcol) break;
1089         if (rp[i] == bcol) {
1090           bap   = ap +  16*i + 4*cidx + ridx;
1091           *bap += value;
1092           goto noinsert1;
1093         }
1094       }
1095       N = nrow++ - 1;
1096       high++; /* added new column thus must search to one higher than before */
1097       /* shift up all the later entries in this row */
1098       for (ii=N; ii>=i; ii--) {
1099         rp[ii+1] = rp[ii];
1100         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1101       }
1102       if (N>=i) {
1103         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1104       }
1105       rp[i]                    = bcol;
1106       ap[16*i + 4*cidx + ridx] = value;
1107 noinsert1:;
1108       low = i;
1109     }
1110     ailen[brow] = nrow;
1111   }
1112   PetscFunctionReturnVoid();
1113 }
1114 
1115 /*
1116      Checks for missing diagonals
1117 */
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1120 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1121 {
1122   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscInt       *diag,*jj = a->j,i;
1125 
1126   PetscFunctionBegin;
1127   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1128   *missing = PETSC_FALSE;
1129   if (A->rmap->n > 0 && !jj) {
1130     *missing = PETSC_TRUE;
1131     if (d) *d = 0;
1132     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1133   } else {
1134     diag = a->diag;
1135     for (i=0; i<a->mbs; i++) {
1136       if (jj[diag[i]] != i) {
1137         *missing = PETSC_TRUE;
1138         if (d) *d = i;
1139         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1140         break;
1141       }
1142     }
1143   }
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1149 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1150 {
1151   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1152   PetscErrorCode ierr;
1153   PetscInt       i,j,m = a->mbs;
1154 
1155   PetscFunctionBegin;
1156   if (!a->diag) {
1157     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1158     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1159     a->free_diag = PETSC_TRUE;
1160   }
1161   for (i=0; i<m; i++) {
1162     a->diag[i] = a->i[i+1];
1163     for (j=a->i[i]; j<a->i[i+1]; j++) {
1164       if (a->j[j] == i) {
1165         a->diag[i] = j;
1166         break;
1167       }
1168     }
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1176 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1177 {
1178   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1179   PetscErrorCode ierr;
1180   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1181   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1182 
1183   PetscFunctionBegin;
1184   *nn = n;
1185   if (!ia) PetscFunctionReturn(0);
1186   if (symmetric) {
1187     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1188     nz   = tia[n];
1189   } else {
1190     tia = a->i; tja = a->j;
1191   }
1192 
1193   if (!blockcompressed && bs > 1) {
1194     (*nn) *= bs;
1195     /* malloc & create the natural set of indices */
1196     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1197     if (n) {
1198       (*ia)[0] = 0;
1199       for (j=1; j<bs; j++) {
1200         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1201       }
1202     }
1203 
1204     for (i=1; i<n; i++) {
1205       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1206       for (j=1; j<bs; j++) {
1207         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1208       }
1209     }
1210     if (n) {
1211       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1212     }
1213 
1214     if (inja) {
1215       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1216       cnt = 0;
1217       for (i=0; i<n; i++) {
1218         for (j=0; j<bs; j++) {
1219           for (k=tia[i]; k<tia[i+1]; k++) {
1220             for (l=0; l<bs; l++) {
1221               (*ja)[cnt++] = bs*tja[k] + l;
1222             }
1223           }
1224         }
1225       }
1226     }
1227 
1228     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1229       ierr = PetscFree(tia);CHKERRQ(ierr);
1230       ierr = PetscFree(tja);CHKERRQ(ierr);
1231     }
1232   } else if (oshift == 1) {
1233     if (symmetric) {
1234       nz = tia[A->rmap->n/bs];
1235       /*  add 1 to i and j indices */
1236       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1237       *ia = tia;
1238       if (ja) {
1239         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1240         *ja = tja;
1241       }
1242     } else {
1243       nz = a->i[A->rmap->n/bs];
1244       /* malloc space and  add 1 to i and j indices */
1245       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1246       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1247       if (ja) {
1248         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1249         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1250       }
1251     }
1252   } else {
1253     *ia = tia;
1254     if (ja) *ja = tja;
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1261 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1262 {
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   if (!ia) PetscFunctionReturn(0);
1267   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1268     ierr = PetscFree(*ia);CHKERRQ(ierr);
1269     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1270   }
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNCT__
1275 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1276 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1277 {
1278   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282 #if defined(PETSC_USE_LOG)
1283   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1284 #endif
1285   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1286   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1287   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1288   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1289   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1290   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1291   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1292   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1293   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1294   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1295   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1296   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1297   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1298 
1299   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1300   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1301   ierr = PetscFree(A->data);CHKERRQ(ierr);
1302 
1303   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1319 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1320 {
1321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1322   PetscErrorCode ierr;
1323 
1324   PetscFunctionBegin;
1325   switch (op) {
1326   case MAT_ROW_ORIENTED:
1327     a->roworiented = flg;
1328     break;
1329   case MAT_KEEP_NONZERO_PATTERN:
1330     a->keepnonzeropattern = flg;
1331     break;
1332   case MAT_NEW_NONZERO_LOCATIONS:
1333     a->nonew = (flg ? 0 : 1);
1334     break;
1335   case MAT_NEW_NONZERO_LOCATION_ERR:
1336     a->nonew = (flg ? -1 : 0);
1337     break;
1338   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1339     a->nonew = (flg ? -2 : 0);
1340     break;
1341   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1342     a->nounused = (flg ? -1 : 0);
1343     break;
1344   case MAT_CHECK_COMPRESSED_ROW:
1345     a->compressedrow.check = flg;
1346     break;
1347   case MAT_NEW_DIAGONALS:
1348   case MAT_IGNORE_OFF_PROC_ENTRIES:
1349   case MAT_USE_HASH_TABLE:
1350     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1351     break;
1352   case MAT_SPD:
1353   case MAT_SYMMETRIC:
1354   case MAT_STRUCTURALLY_SYMMETRIC:
1355   case MAT_HERMITIAN:
1356   case MAT_SYMMETRY_ETERNAL:
1357     /* These options are handled directly by MatSetOption() */
1358     break;
1359   default:
1360     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1367 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1368 {
1369   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1370   PetscErrorCode ierr;
1371   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1372   MatScalar      *aa,*aa_i;
1373   PetscScalar    *v_i;
1374 
1375   PetscFunctionBegin;
1376   bs  = A->rmap->bs;
1377   ai  = a->i;
1378   aj  = a->j;
1379   aa  = a->a;
1380   bs2 = a->bs2;
1381 
1382   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1383 
1384   bn  = row/bs;   /* Block number */
1385   bp  = row % bs; /* Block Position */
1386   M   = ai[bn+1] - ai[bn];
1387   *nz = bs*M;
1388 
1389   if (v) {
1390     *v = 0;
1391     if (*nz) {
1392       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1393       for (i=0; i<M; i++) { /* for each block in the block row */
1394         v_i  = *v + i*bs;
1395         aa_i = aa + bs2*(ai[bn] + i);
1396         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1397       }
1398     }
1399   }
1400 
1401   if (idx) {
1402     *idx = 0;
1403     if (*nz) {
1404       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1405       for (i=0; i<M; i++) { /* for each block in the block row */
1406         idx_i = *idx + i*bs;
1407         itmp  = bs*aj[ai[bn] + i];
1408         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1409       }
1410     }
1411   }
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1417 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1418 {
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1423   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1431 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1432 {
1433   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1434   Mat            C;
1435   PetscErrorCode ierr;
1436   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1437   PetscInt       *rows,*cols,bs2=a->bs2;
1438   MatScalar      *array;
1439 
1440   PetscFunctionBegin;
1441   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1442   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1443     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1444     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1445 
1446     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1447     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1448     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1449     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1450     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1451     ierr = PetscFree(col);CHKERRQ(ierr);
1452   } else {
1453     C = *B;
1454   }
1455 
1456   array = a->a;
1457   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1458   for (i=0; i<mbs; i++) {
1459     cols[0] = i*bs;
1460     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1461     len = ai[i+1] - ai[i];
1462     for (j=0; j<len; j++) {
1463       rows[0] = (*aj++)*bs;
1464       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1465       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1466       array += bs2;
1467     }
1468   }
1469   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1470 
1471   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1472   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1473 
1474   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1475     *B = C;
1476   } else {
1477     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1484 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1485 {
1486   PetscErrorCode ierr;
1487   Mat            Btrans;
1488 
1489   PetscFunctionBegin;
1490   *f   = PETSC_FALSE;
1491   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1492   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1493   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1499 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1500 {
1501   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1502   PetscErrorCode ierr;
1503   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1504   int            fd;
1505   PetscScalar    *aa;
1506   FILE           *file;
1507 
1508   PetscFunctionBegin;
1509   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1510   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1511   col_lens[0] = MAT_FILE_CLASSID;
1512 
1513   col_lens[1] = A->rmap->N;
1514   col_lens[2] = A->cmap->n;
1515   col_lens[3] = a->nz*bs2;
1516 
1517   /* store lengths of each row and write (including header) to file */
1518   count = 0;
1519   for (i=0; i<a->mbs; i++) {
1520     for (j=0; j<bs; j++) {
1521       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1522     }
1523   }
1524   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1525   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1526 
1527   /* store column indices (zero start index) */
1528   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1529   count = 0;
1530   for (i=0; i<a->mbs; i++) {
1531     for (j=0; j<bs; j++) {
1532       for (k=a->i[i]; k<a->i[i+1]; k++) {
1533         for (l=0; l<bs; l++) {
1534           jj[count++] = bs*a->j[k] + l;
1535         }
1536       }
1537     }
1538   }
1539   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1540   ierr = PetscFree(jj);CHKERRQ(ierr);
1541 
1542   /* store nonzero values */
1543   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1544   count = 0;
1545   for (i=0; i<a->mbs; i++) {
1546     for (j=0; j<bs; j++) {
1547       for (k=a->i[i]; k<a->i[i+1]; k++) {
1548         for (l=0; l<bs; l++) {
1549           aa[count++] = a->a[bs2*k + l*bs + j];
1550         }
1551       }
1552     }
1553   }
1554   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1555   ierr = PetscFree(aa);CHKERRQ(ierr);
1556 
1557   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1558   if (file) {
1559     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1566 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1567 {
1568   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1569   PetscErrorCode    ierr;
1570   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1571   PetscViewerFormat format;
1572 
1573   PetscFunctionBegin;
1574   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1575   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1576     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1577   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1578     Mat aij;
1579     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1580     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1581     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1582   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1583       PetscFunctionReturn(0);
1584   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1586     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1587     for (i=0; i<a->mbs; i++) {
1588       for (j=0; j<bs; j++) {
1589         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1590         for (k=a->i[i]; k<a->i[i+1]; k++) {
1591           for (l=0; l<bs; l++) {
1592 #if defined(PETSC_USE_COMPLEX)
1593             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1594               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1595                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1596             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1597               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1598                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1599             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1600               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1601             }
1602 #else
1603             if (a->a[bs2*k + l*bs + j] != 0.0) {
1604               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1605             }
1606 #endif
1607           }
1608         }
1609         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1610       }
1611     }
1612     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1613   } else {
1614     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1615     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1616     for (i=0; i<a->mbs; i++) {
1617       for (j=0; j<bs; j++) {
1618         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1619         for (k=a->i[i]; k<a->i[i+1]; k++) {
1620           for (l=0; l<bs; l++) {
1621 #if defined(PETSC_USE_COMPLEX)
1622             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1623               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1624                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1625             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1626               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1627                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1628             } else {
1629               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1630             }
1631 #else
1632             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1633 #endif
1634           }
1635         }
1636         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1637       }
1638     }
1639     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1640   }
1641   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #include <petscdraw.h>
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1648 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1649 {
1650   Mat               A = (Mat) Aa;
1651   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1652   PetscErrorCode    ierr;
1653   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1654   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1655   MatScalar         *aa;
1656   PetscViewer       viewer;
1657   PetscViewerFormat format;
1658 
1659   PetscFunctionBegin;
1660   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1661   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1662 
1663   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1664 
1665   /* loop over matrix elements drawing boxes */
1666 
1667   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1668     color = PETSC_DRAW_BLUE;
1669     for (i=0,row=0; i<mbs; i++,row+=bs) {
1670       for (j=a->i[i]; j<a->i[i+1]; j++) {
1671         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1672         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1673         aa  = a->a + j*bs2;
1674         for (k=0; k<bs; k++) {
1675           for (l=0; l<bs; l++) {
1676             if (PetscRealPart(*aa++) >=  0.) continue;
1677             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1678           }
1679         }
1680       }
1681     }
1682     color = PETSC_DRAW_CYAN;
1683     for (i=0,row=0; i<mbs; i++,row+=bs) {
1684       for (j=a->i[i]; j<a->i[i+1]; j++) {
1685         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1686         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1687         aa  = a->a + j*bs2;
1688         for (k=0; k<bs; k++) {
1689           for (l=0; l<bs; l++) {
1690             if (PetscRealPart(*aa++) != 0.) continue;
1691             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1692           }
1693         }
1694       }
1695     }
1696     color = PETSC_DRAW_RED;
1697     for (i=0,row=0; i<mbs; i++,row+=bs) {
1698       for (j=a->i[i]; j<a->i[i+1]; j++) {
1699         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1700         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1701         aa  = a->a + j*bs2;
1702         for (k=0; k<bs; k++) {
1703           for (l=0; l<bs; l++) {
1704             if (PetscRealPart(*aa++) <= 0.) continue;
1705             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1706           }
1707         }
1708       }
1709     }
1710   } else {
1711     /* use contour shading to indicate magnitude of values */
1712     /* first determine max of all nonzero values */
1713     PetscDraw popup;
1714     PetscReal scale,maxv = 0.0;
1715 
1716     for (i=0; i<a->nz*a->bs2; i++) {
1717       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1718     }
1719     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1720     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1721     if (popup) {
1722       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1723     }
1724     for (i=0,row=0; i<mbs; i++,row+=bs) {
1725       for (j=a->i[i]; j<a->i[i+1]; j++) {
1726         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1727         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1728         aa  = a->a + j*bs2;
1729         for (k=0; k<bs; k++) {
1730           for (l=0; l<bs; l++) {
1731             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1732             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1733           }
1734         }
1735       }
1736     }
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1743 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1744 {
1745   PetscErrorCode ierr;
1746   PetscReal      xl,yl,xr,yr,w,h;
1747   PetscDraw      draw;
1748   PetscBool      isnull;
1749 
1750   PetscFunctionBegin;
1751   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1752   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1753 
1754   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1755   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1756   xr  += w;    yr += h;  xl = -w;     yl = -h;
1757   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1758   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1759   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatView_SeqBAIJ"
1765 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1766 {
1767   PetscErrorCode ierr;
1768   PetscBool      iascii,isbinary,isdraw;
1769 
1770   PetscFunctionBegin;
1771   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1772   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1773   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1774   if (iascii) {
1775     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1776   } else if (isbinary) {
1777     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1778   } else if (isdraw) {
1779     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1780   } else {
1781     Mat B;
1782     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1783     ierr = MatView(B,viewer);CHKERRQ(ierr);
1784     ierr = MatDestroy(&B);CHKERRQ(ierr);
1785   }
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1792 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1793 {
1794   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1795   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1796   PetscInt    *ai = a->i,*ailen = a->ilen;
1797   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1798   MatScalar   *ap,*aa = a->a;
1799 
1800   PetscFunctionBegin;
1801   for (k=0; k<m; k++) { /* loop over rows */
1802     row = im[k]; brow = row/bs;
1803     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1804     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1805     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1806     nrow = ailen[brow];
1807     for (l=0; l<n; l++) { /* loop over columns */
1808       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1809       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1810       col  = in[l];
1811       bcol = col/bs;
1812       cidx = col%bs;
1813       ridx = row%bs;
1814       high = nrow;
1815       low  = 0; /* assume unsorted */
1816       while (high-low > 5) {
1817         t = (low+high)/2;
1818         if (rp[t] > bcol) high = t;
1819         else             low  = t;
1820       }
1821       for (i=low; i<high; i++) {
1822         if (rp[i] > bcol) break;
1823         if (rp[i] == bcol) {
1824           *v++ = ap[bs2*i+bs*cidx+ridx];
1825           goto finished;
1826         }
1827       }
1828       *v++ = 0.0;
1829 finished:;
1830     }
1831   }
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1837 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1838 {
1839   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1840   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1841   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1842   PetscErrorCode    ierr;
1843   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1844   PetscBool         roworiented=a->roworiented;
1845   const PetscScalar *value     = v;
1846   MatScalar         *ap,*aa = a->a,*bap;
1847 
1848   PetscFunctionBegin;
1849   if (roworiented) {
1850     stepval = (n-1)*bs;
1851   } else {
1852     stepval = (m-1)*bs;
1853   }
1854   for (k=0; k<m; k++) { /* loop over added rows */
1855     row = im[k];
1856     if (row < 0) continue;
1857 #if defined(PETSC_USE_DEBUG)
1858     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1859 #endif
1860     rp   = aj + ai[row];
1861     ap   = aa + bs2*ai[row];
1862     rmax = imax[row];
1863     nrow = ailen[row];
1864     low  = 0;
1865     high = nrow;
1866     for (l=0; l<n; l++) { /* loop over added columns */
1867       if (in[l] < 0) continue;
1868 #if defined(PETSC_USE_DEBUG)
1869       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1870 #endif
1871       col = in[l];
1872       if (roworiented) {
1873         value = v + (k*(stepval+bs) + l)*bs;
1874       } else {
1875         value = v + (l*(stepval+bs) + k)*bs;
1876       }
1877       if (col <= lastcol) low = 0;
1878       else high = nrow;
1879       lastcol = col;
1880       while (high-low > 7) {
1881         t = (low+high)/2;
1882         if (rp[t] > col) high = t;
1883         else             low  = t;
1884       }
1885       for (i=low; i<high; i++) {
1886         if (rp[i] > col) break;
1887         if (rp[i] == col) {
1888           bap = ap +  bs2*i;
1889           if (roworiented) {
1890             if (is == ADD_VALUES) {
1891               for (ii=0; ii<bs; ii++,value+=stepval) {
1892                 for (jj=ii; jj<bs2; jj+=bs) {
1893                   bap[jj] += *value++;
1894                 }
1895               }
1896             } else {
1897               for (ii=0; ii<bs; ii++,value+=stepval) {
1898                 for (jj=ii; jj<bs2; jj+=bs) {
1899                   bap[jj] = *value++;
1900                 }
1901               }
1902             }
1903           } else {
1904             if (is == ADD_VALUES) {
1905               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1906                 for (jj=0; jj<bs; jj++) {
1907                   bap[jj] += value[jj];
1908                 }
1909                 bap += bs;
1910               }
1911             } else {
1912               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1913                 for (jj=0; jj<bs; jj++) {
1914                   bap[jj]  = value[jj];
1915                 }
1916                 bap += bs;
1917               }
1918             }
1919           }
1920           goto noinsert2;
1921         }
1922       }
1923       if (nonew == 1) goto noinsert2;
1924       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1925       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1926       N = nrow++ - 1; high++;
1927       /* shift up all the later entries in this row */
1928       for (ii=N; ii>=i; ii--) {
1929         rp[ii+1] = rp[ii];
1930         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1931       }
1932       if (N >= i) {
1933         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1934       }
1935       rp[i] = col;
1936       bap   = ap +  bs2*i;
1937       if (roworiented) {
1938         for (ii=0; ii<bs; ii++,value+=stepval) {
1939           for (jj=ii; jj<bs2; jj+=bs) {
1940             bap[jj] = *value++;
1941           }
1942         }
1943       } else {
1944         for (ii=0; ii<bs; ii++,value+=stepval) {
1945           for (jj=0; jj<bs; jj++) {
1946             *bap++ = *value++;
1947           }
1948         }
1949       }
1950 noinsert2:;
1951       low = i;
1952     }
1953     ailen[row] = nrow;
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1960 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1961 {
1962   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1963   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1964   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1965   PetscErrorCode ierr;
1966   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1967   MatScalar      *aa  = a->a,*ap;
1968   PetscReal      ratio=0.6;
1969 
1970   PetscFunctionBegin;
1971   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1972 
1973   if (m) rmax = ailen[0];
1974   for (i=1; i<mbs; i++) {
1975     /* move each row back by the amount of empty slots (fshift) before it*/
1976     fshift += imax[i-1] - ailen[i-1];
1977     rmax    = PetscMax(rmax,ailen[i]);
1978     if (fshift) {
1979       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1980       N  = ailen[i];
1981       for (j=0; j<N; j++) {
1982         ip[j-fshift] = ip[j];
1983 
1984         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1985       }
1986     }
1987     ai[i] = ai[i-1] + ailen[i-1];
1988   }
1989   if (mbs) {
1990     fshift += imax[mbs-1] - ailen[mbs-1];
1991     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1992   }
1993   /* reset ilen and imax for each row */
1994   for (i=0; i<mbs; i++) {
1995     ailen[i] = imax[i] = ai[i+1] - ai[i];
1996   }
1997   a->nz = ai[mbs];
1998 
1999   /* diagonals may have moved, so kill the diagonal pointers */
2000   a->idiagvalid = PETSC_FALSE;
2001   if (fshift && a->diag) {
2002     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2003     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2004     a->diag = 0;
2005   }
2006   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2007   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2008   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2009   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2010 
2011   A->info.mallocs    += a->reallocs;
2012   a->reallocs         = 0;
2013   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2014 
2015   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2016 
2017   A->same_nonzero = PETSC_TRUE;
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 /*
2022    This function returns an array of flags which indicate the locations of contiguous
2023    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2024    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2025    Assume: sizes should be long enough to hold all the values.
2026 */
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2029 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2030 {
2031   PetscInt  i,j,k,row;
2032   PetscBool flg;
2033 
2034   PetscFunctionBegin;
2035   for (i=0,j=0; i<n; j++) {
2036     row = idx[i];
2037     if (row%bs!=0) { /* Not the begining of a block */
2038       sizes[j] = 1;
2039       i++;
2040     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2041       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2042       i++;
2043     } else { /* Begining of the block, so check if the complete block exists */
2044       flg = PETSC_TRUE;
2045       for (k=1; k<bs; k++) {
2046         if (row+k != idx[i+k]) { /* break in the block */
2047           flg = PETSC_FALSE;
2048           break;
2049         }
2050       }
2051       if (flg) { /* No break in the bs */
2052         sizes[j] = bs;
2053         i       += bs;
2054       } else {
2055         sizes[j] = 1;
2056         i++;
2057       }
2058     }
2059   }
2060   *bs_max = j;
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2066 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2067 {
2068   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2069   PetscErrorCode    ierr;
2070   PetscInt          i,j,k,count,*rows;
2071   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2072   PetscScalar       zero = 0.0;
2073   MatScalar         *aa;
2074   const PetscScalar *xx;
2075   PetscScalar       *bb;
2076 
2077   PetscFunctionBegin;
2078   /* fix right hand side if needed */
2079   if (x && b) {
2080     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2081     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2082     for (i=0; i<is_n; i++) {
2083       bb[is_idx[i]] = diag*xx[is_idx[i]];
2084     }
2085     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2086     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2087   }
2088 
2089   /* Make a copy of the IS and  sort it */
2090   /* allocate memory for rows,sizes */
2091   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2092 
2093   /* copy IS values to rows, and sort them */
2094   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2095   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2096 
2097   if (baij->keepnonzeropattern) {
2098     for (i=0; i<is_n; i++) sizes[i] = 1;
2099     bs_max          = is_n;
2100     A->same_nonzero = PETSC_TRUE;
2101   } else {
2102     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2103 
2104     A->same_nonzero = PETSC_FALSE;
2105   }
2106 
2107   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2108     row = rows[j];
2109     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2110     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2111     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2112     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2113       if (diag != (PetscScalar)0.0) {
2114         if (baij->ilen[row/bs] > 0) {
2115           baij->ilen[row/bs]       = 1;
2116           baij->j[baij->i[row/bs]] = row/bs;
2117 
2118           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2119         }
2120         /* Now insert all the diagonal values for this bs */
2121         for (k=0; k<bs; k++) {
2122           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2123         }
2124       } else { /* (diag == 0.0) */
2125         baij->ilen[row/bs] = 0;
2126       } /* end (diag == 0.0) */
2127     } else { /* (sizes[i] != bs) */
2128 #if defined(PETSC_USE_DEBUG)
2129       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2130 #endif
2131       for (k=0; k<count; k++) {
2132         aa[0] =  zero;
2133         aa   += bs;
2134       }
2135       if (diag != (PetscScalar)0.0) {
2136         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2137       }
2138     }
2139   }
2140 
2141   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2142   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 #undef __FUNCT__
2147 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2148 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2149 {
2150   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2151   PetscErrorCode    ierr;
2152   PetscInt          i,j,k,count;
2153   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2154   PetscScalar       zero = 0.0;
2155   MatScalar         *aa;
2156   const PetscScalar *xx;
2157   PetscScalar       *bb;
2158   PetscBool         *zeroed,vecs = PETSC_FALSE;
2159 
2160   PetscFunctionBegin;
2161   /* fix right hand side if needed */
2162   if (x && b) {
2163     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2164     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2165     vecs = PETSC_TRUE;
2166   }
2167   A->same_nonzero = PETSC_TRUE;
2168 
2169   /* zero the columns */
2170   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2171   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2172   for (i=0; i<is_n; i++) {
2173     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2174     zeroed[is_idx[i]] = PETSC_TRUE;
2175   }
2176   for (i=0; i<A->rmap->N; i++) {
2177     if (!zeroed[i]) {
2178       row = i/bs;
2179       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2180         for (k=0; k<bs; k++) {
2181           col = bs*baij->j[j] + k;
2182           if (zeroed[col]) {
2183             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2184             if (vecs) bb[i] -= aa[0]*xx[col];
2185             aa[0] = 0.0;
2186           }
2187         }
2188       }
2189     } else if (vecs) bb[i] = diag*xx[i];
2190   }
2191   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2192   if (vecs) {
2193     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2194     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2195   }
2196 
2197   /* zero the rows */
2198   for (i=0; i<is_n; i++) {
2199     row   = is_idx[i];
2200     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2201     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2202     for (k=0; k<count; k++) {
2203       aa[0] =  zero;
2204       aa   += bs;
2205     }
2206     if (diag != (PetscScalar)0.0) {
2207       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2208     }
2209   }
2210   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2216 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2217 {
2218   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2219   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2220   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2221   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2222   PetscErrorCode ierr;
2223   PetscInt       ridx,cidx,bs2=a->bs2;
2224   PetscBool      roworiented=a->roworiented;
2225   MatScalar      *ap,value,*aa=a->a,*bap;
2226 
2227   PetscFunctionBegin;
2228   if (v) PetscValidScalarPointer(v,6);
2229   for (k=0; k<m; k++) { /* loop over added rows */
2230     row  = im[k];
2231     brow = row/bs;
2232     if (row < 0) continue;
2233 #if defined(PETSC_USE_DEBUG)
2234     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2235 #endif
2236     rp   = aj + ai[brow];
2237     ap   = aa + bs2*ai[brow];
2238     rmax = imax[brow];
2239     nrow = ailen[brow];
2240     low  = 0;
2241     high = nrow;
2242     for (l=0; l<n; l++) { /* loop over added columns */
2243       if (in[l] < 0) continue;
2244 #if defined(PETSC_USE_DEBUG)
2245       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2246 #endif
2247       col  = in[l]; bcol = col/bs;
2248       ridx = row % bs; cidx = col % bs;
2249       if (roworiented) {
2250         value = v[l + k*n];
2251       } else {
2252         value = v[k + l*m];
2253       }
2254       if (col <= lastcol) low = 0; else high = nrow;
2255       lastcol = col;
2256       while (high-low > 7) {
2257         t = (low+high)/2;
2258         if (rp[t] > bcol) high = t;
2259         else              low  = t;
2260       }
2261       for (i=low; i<high; i++) {
2262         if (rp[i] > bcol) break;
2263         if (rp[i] == bcol) {
2264           bap = ap +  bs2*i + bs*cidx + ridx;
2265           if (is == ADD_VALUES) *bap += value;
2266           else                  *bap  = value;
2267           goto noinsert1;
2268         }
2269       }
2270       if (nonew == 1) goto noinsert1;
2271       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2272       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2273       N = nrow++ - 1; high++;
2274       /* shift up all the later entries in this row */
2275       for (ii=N; ii>=i; ii--) {
2276         rp[ii+1] = rp[ii];
2277         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2278       }
2279       if (N>=i) {
2280         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2281       }
2282       rp[i]                      = bcol;
2283       ap[bs2*i + bs*cidx + ridx] = value;
2284       a->nz++;
2285 noinsert1:;
2286       low = i;
2287     }
2288     ailen[brow] = nrow;
2289   }
2290   A->same_nonzero = PETSC_FALSE;
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2296 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2297 {
2298   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2299   Mat            outA;
2300   PetscErrorCode ierr;
2301   PetscBool      row_identity,col_identity;
2302 
2303   PetscFunctionBegin;
2304   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2305   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2306   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2307   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2308 
2309   outA            = inA;
2310   inA->factortype = MAT_FACTOR_LU;
2311 
2312   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2313 
2314   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2315   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2316   a->row = row;
2317   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2318   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2319   a->col = col;
2320 
2321   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2322   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2323   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2324   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2325 
2326   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2327   if (!a->solve_work) {
2328     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2329     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2330   }
2331   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2337 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2338 {
2339   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2340   PetscInt    i,nz,mbs;
2341 
2342   PetscFunctionBegin;
2343   nz  = baij->maxnz;
2344   mbs = baij->mbs;
2345   for (i=0; i<nz; i++) {
2346     baij->j[i] = indices[i];
2347   }
2348   baij->nz = nz;
2349   for (i=0; i<mbs; i++) {
2350     baij->ilen[i] = baij->imax[i];
2351   }
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2357 /*@
2358     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2359        in the matrix.
2360 
2361   Input Parameters:
2362 +  mat - the SeqBAIJ matrix
2363 -  indices - the column indices
2364 
2365   Level: advanced
2366 
2367   Notes:
2368     This can be called if you have precomputed the nonzero structure of the
2369   matrix and want to provide it to the matrix object to improve the performance
2370   of the MatSetValues() operation.
2371 
2372     You MUST have set the correct numbers of nonzeros per row in the call to
2373   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2374 
2375     MUST be called before any calls to MatSetValues();
2376 
2377 @*/
2378 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2379 {
2380   PetscErrorCode ierr;
2381 
2382   PetscFunctionBegin;
2383   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2384   PetscValidPointer(indices,2);
2385   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2391 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2392 {
2393   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2394   PetscErrorCode ierr;
2395   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2396   PetscReal      atmp;
2397   PetscScalar    *x,zero = 0.0;
2398   MatScalar      *aa;
2399   PetscInt       ncols,brow,krow,kcol;
2400 
2401   PetscFunctionBegin;
2402   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2403   bs  = A->rmap->bs;
2404   aa  = a->a;
2405   ai  = a->i;
2406   aj  = a->j;
2407   mbs = a->mbs;
2408 
2409   ierr = VecSet(v,zero);CHKERRQ(ierr);
2410   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2411   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2412   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2413   for (i=0; i<mbs; i++) {
2414     ncols = ai[1] - ai[0]; ai++;
2415     brow  = bs*i;
2416     for (j=0; j<ncols; j++) {
2417       for (kcol=0; kcol<bs; kcol++) {
2418         for (krow=0; krow<bs; krow++) {
2419           atmp = PetscAbsScalar(*aa);aa++;
2420           row  = brow + krow;   /* row index */
2421           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2422           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2423         }
2424       }
2425       aj++;
2426     }
2427   }
2428   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatCopy_SeqBAIJ"
2434 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2435 {
2436   PetscErrorCode ierr;
2437 
2438   PetscFunctionBegin;
2439   /* If the two matrices have the same copy implementation, use fast copy. */
2440   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2441     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2442     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2443     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2444 
2445     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2446     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2447     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2448   } else {
2449     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2456 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2457 {
2458   PetscErrorCode ierr;
2459 
2460   PetscFunctionBegin;
2461   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2467 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2468 {
2469   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2470 
2471   PetscFunctionBegin;
2472   *array = a->a;
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2478 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2479 {
2480   PetscFunctionBegin;
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2486 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2487 {
2488   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2489   PetscErrorCode ierr;
2490   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2491   PetscBLASInt   one=1;
2492 
2493   PetscFunctionBegin;
2494   if (str == SAME_NONZERO_PATTERN) {
2495     PetscScalar  alpha = a;
2496     PetscBLASInt bnz;
2497     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2498     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2499   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2500     if (y->xtoy && y->XtoY != X) {
2501       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2502       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2503     }
2504     if (!y->xtoy) { /* get xtoy */
2505       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2506       y->XtoY = X;
2507       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2508     }
2509     for (i=0; i<x->nz; i++) {
2510       j = 0;
2511       while (j < bs2) {
2512         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2513         j++;
2514       }
2515     }
2516     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2517   } else {
2518     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2519   }
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 #undef __FUNCT__
2524 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2525 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2526 {
2527   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2528   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2529   MatScalar   *aa = a->a;
2530 
2531   PetscFunctionBegin;
2532   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2538 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2539 {
2540   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2541   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2542   MatScalar   *aa = a->a;
2543 
2544   PetscFunctionBegin;
2545   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2550 
2551 #undef __FUNCT__
2552 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2553 /*
2554     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2555 */
2556 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2557 {
2558   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2559   PetscErrorCode ierr;
2560   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2561   PetscInt       nz = a->i[m],row,*jj,mr,col;
2562 
2563   PetscFunctionBegin;
2564   *nn = n;
2565   if (!ia) PetscFunctionReturn(0);
2566   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2567   else {
2568     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2569     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2570     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2571     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2572     jj   = a->j;
2573     for (i=0; i<nz; i++) {
2574       collengths[jj[i]]++;
2575     }
2576     cia[0] = oshift;
2577     for (i=0; i<n; i++) {
2578       cia[i+1] = cia[i] + collengths[i];
2579     }
2580     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2581     jj   = a->j;
2582     for (row=0; row<m; row++) {
2583       mr = a->i[row+1] - a->i[row];
2584       for (i=0; i<mr; i++) {
2585         col = *jj++;
2586 
2587         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2588       }
2589     }
2590     ierr = PetscFree(collengths);CHKERRQ(ierr);
2591     *ia  = cia; *ja = cja;
2592   }
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2598 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2599 {
2600   PetscErrorCode ierr;
2601 
2602   PetscFunctionBegin;
2603   if (!ia) PetscFunctionReturn(0);
2604   ierr = PetscFree(*ia);CHKERRQ(ierr);
2605   ierr = PetscFree(*ja);CHKERRQ(ierr);
2606   PetscFunctionReturn(0);
2607 }
2608 
2609 #undef __FUNCT__
2610 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2611 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2612 {
2613   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2614   PetscErrorCode ierr;
2615   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2616   PetscScalar    dx,*y,*xx,*w3_array;
2617   PetscScalar    *vscale_array;
2618   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2619   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2620   void           *fctx   = coloring->fctx;
2621   PetscBool      flg     = PETSC_FALSE;
2622   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2623   Vec            x1_tmp;
2624 
2625   PetscFunctionBegin;
2626   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2627   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2628   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2629   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2630 
2631   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2632   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2633   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2634   if (flg) {
2635     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2636   } else {
2637     PetscBool assembled;
2638     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2639     if (assembled) {
2640       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2641     }
2642   }
2643 
2644   x1_tmp = x1;
2645   if (!coloring->vscale) {
2646     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2647   }
2648 
2649   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2650     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2651   }
2652   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2653 
2654   /* Set w1 = F(x1) */
2655   if (!coloring->fset) {
2656     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2657     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2658     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2659   } else {
2660     coloring->fset = PETSC_FALSE;
2661   }
2662 
2663   if (!coloring->w3) {
2664     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2665     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2666   }
2667   w3 = coloring->w3;
2668 
2669   /* Compute all the local scale factors, including ghost points */
2670   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2671   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2672   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2673   if (ctype == IS_COLORING_GHOSTED) {
2674     col_start = 0; col_end = N;
2675   } else if (ctype == IS_COLORING_GLOBAL) {
2676     xx           = xx - start;
2677     vscale_array = vscale_array - start;
2678     col_start    = start; col_end = N + start;
2679   }
2680   for (col=col_start; col<col_end; col++) {
2681     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2682     if (coloring->htype[0] == 'w') {
2683       dx = 1.0 + unorm;
2684     } else {
2685       dx = xx[col];
2686     }
2687     if (dx == (PetscScalar)0.0) dx = 1.0;
2688     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2689     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2690     dx               *= epsilon;
2691     vscale_array[col] = (PetscScalar)1.0/dx;
2692   }
2693   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2694   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2695   if (ctype == IS_COLORING_GLOBAL) {
2696     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2697     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2698   }
2699   if (coloring->vscaleforrow) {
2700     vscaleforrow = coloring->vscaleforrow;
2701   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2702 
2703   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2704   /*
2705     Loop over each color
2706   */
2707   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2708   for (k=0; k<coloring->ncolors; k++) {
2709     coloring->currentcolor = k;
2710     for (i=0; i<bs; i++) {
2711       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2712       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2713       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2714       /*
2715         Loop over each column associated with color
2716         adding the perturbation to the vector w3.
2717       */
2718       for (l=0; l<coloring->ncolumns[k]; l++) {
2719         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2720         if (coloring->htype[0] == 'w') {
2721           dx = 1.0 + unorm;
2722         } else {
2723           dx = xx[col];
2724         }
2725         if (dx == (PetscScalar)0.0) dx = 1.0;
2726         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2727         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2728         dx *= epsilon;
2729         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2730         w3_array[col] += dx;
2731       }
2732       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2733       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2734 
2735       /*
2736         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2737         w2 = F(x1 + dx) - F(x1)
2738       */
2739       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2740       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2741       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2742       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2743 
2744       /*
2745         Loop over rows of vector, putting results into Jacobian matrix
2746       */
2747       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2748       for (l=0; l<coloring->nrows[k]; l++) {
2749         row = bs*coloring->rows[k][l];                /* local row index */
2750         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2751         for (j=0; j<bs; j++) {
2752           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2753           srows[j]  = row + start + j;
2754         }
2755         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2756       }
2757       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2758     }
2759   } /* endof for each color */
2760   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2761   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2762   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2763   ierr = PetscFree(srows);CHKERRQ(ierr);
2764 
2765   coloring->currentcolor = -1;
2766 
2767   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 /* -------------------------------------------------------------------*/
2774 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2775                                        MatGetRow_SeqBAIJ,
2776                                        MatRestoreRow_SeqBAIJ,
2777                                        MatMult_SeqBAIJ_N,
2778                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2779                                        MatMultTranspose_SeqBAIJ,
2780                                        MatMultTransposeAdd_SeqBAIJ,
2781                                        0,
2782                                        0,
2783                                        0,
2784                                /* 10*/ 0,
2785                                        MatLUFactor_SeqBAIJ,
2786                                        0,
2787                                        0,
2788                                        MatTranspose_SeqBAIJ,
2789                                /* 15*/ MatGetInfo_SeqBAIJ,
2790                                        MatEqual_SeqBAIJ,
2791                                        MatGetDiagonal_SeqBAIJ,
2792                                        MatDiagonalScale_SeqBAIJ,
2793                                        MatNorm_SeqBAIJ,
2794                                /* 20*/ 0,
2795                                        MatAssemblyEnd_SeqBAIJ,
2796                                        MatSetOption_SeqBAIJ,
2797                                        MatZeroEntries_SeqBAIJ,
2798                                /* 24*/ MatZeroRows_SeqBAIJ,
2799                                        0,
2800                                        0,
2801                                        0,
2802                                        0,
2803                                /* 29*/ MatSetUp_SeqBAIJ,
2804                                        0,
2805                                        0,
2806                                        0,
2807                                        0,
2808                                /* 34*/ MatDuplicate_SeqBAIJ,
2809                                        0,
2810                                        0,
2811                                        MatILUFactor_SeqBAIJ,
2812                                        0,
2813                                /* 39*/ MatAXPY_SeqBAIJ,
2814                                        MatGetSubMatrices_SeqBAIJ,
2815                                        MatIncreaseOverlap_SeqBAIJ,
2816                                        MatGetValues_SeqBAIJ,
2817                                        MatCopy_SeqBAIJ,
2818                                /* 44*/ 0,
2819                                        MatScale_SeqBAIJ,
2820                                        0,
2821                                        0,
2822                                        MatZeroRowsColumns_SeqBAIJ,
2823                                /* 49*/ 0,
2824                                        MatGetRowIJ_SeqBAIJ,
2825                                        MatRestoreRowIJ_SeqBAIJ,
2826                                        MatGetColumnIJ_SeqBAIJ,
2827                                        MatRestoreColumnIJ_SeqBAIJ,
2828                                /* 54*/ MatFDColoringCreate_SeqAIJ,
2829                                        0,
2830                                        0,
2831                                        0,
2832                                        MatSetValuesBlocked_SeqBAIJ,
2833                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2834                                        MatDestroy_SeqBAIJ,
2835                                        MatView_SeqBAIJ,
2836                                        0,
2837                                        0,
2838                                /* 64*/ 0,
2839                                        0,
2840                                        0,
2841                                        0,
2842                                        0,
2843                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2844                                        0,
2845                                        MatConvert_Basic,
2846                                        0,
2847                                        0,
2848                                /* 74*/ 0,
2849                                        MatFDColoringApply_BAIJ,
2850                                        0,
2851                                        0,
2852                                        0,
2853                                /* 79*/ 0,
2854                                        0,
2855                                        0,
2856                                        0,
2857                                        MatLoad_SeqBAIJ,
2858                                /* 84*/ 0,
2859                                        0,
2860                                        0,
2861                                        0,
2862                                        0,
2863                                /* 89*/ 0,
2864                                        0,
2865                                        0,
2866                                        0,
2867                                        0,
2868                                /* 94*/ 0,
2869                                        0,
2870                                        0,
2871                                        0,
2872                                        0,
2873                                /* 99*/ 0,
2874                                        0,
2875                                        0,
2876                                        0,
2877                                        0,
2878                                /*104*/ 0,
2879                                        MatRealPart_SeqBAIJ,
2880                                        MatImaginaryPart_SeqBAIJ,
2881                                        0,
2882                                        0,
2883                                /*109*/ 0,
2884                                        0,
2885                                        0,
2886                                        0,
2887                                        MatMissingDiagonal_SeqBAIJ,
2888                                /*114*/ 0,
2889                                        0,
2890                                        0,
2891                                        0,
2892                                        0,
2893                                /*119*/ 0,
2894                                        0,
2895                                        MatMultHermitianTranspose_SeqBAIJ,
2896                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2897                                        0,
2898                                /*124*/ 0,
2899                                        0,
2900                                        MatInvertBlockDiagonal_SeqBAIJ,
2901                                        0,
2902                                        0,
2903                                /*129*/ 0,
2904                                        0,
2905                                        0,
2906                                        0,
2907                                        0,
2908                                /*134*/ 0,
2909                                        0,
2910                                        0,
2911                                        0,
2912                                        0,
2913                                /*139*/ 0,
2914                                        0
2915 };
2916 
2917 #undef __FUNCT__
2918 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2919 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2920 {
2921   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2922   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2923   PetscErrorCode ierr;
2924 
2925   PetscFunctionBegin;
2926   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2927 
2928   /* allocate space for values if not already there */
2929   if (!aij->saved_values) {
2930     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2931     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2932   }
2933 
2934   /* copy values over */
2935   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 #undef __FUNCT__
2940 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2941 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2942 {
2943   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2944   PetscErrorCode ierr;
2945   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2946 
2947   PetscFunctionBegin;
2948   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2949   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2950 
2951   /* copy values over */
2952   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2957 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2958 
2959 #undef __FUNCT__
2960 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2961 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2962 {
2963   Mat_SeqBAIJ    *b;
2964   PetscErrorCode ierr;
2965   PetscInt       i,mbs,nbs,bs2;
2966   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2967 
2968   PetscFunctionBegin;
2969   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2970   if (nz == MAT_SKIP_ALLOCATION) {
2971     skipallocation = PETSC_TRUE;
2972     nz             = 0;
2973   }
2974 
2975   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2976   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2977   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2978   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2979   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2980 
2981   B->preallocated = PETSC_TRUE;
2982 
2983   mbs = B->rmap->n/bs;
2984   nbs = B->cmap->n/bs;
2985   bs2 = bs*bs;
2986 
2987   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2988 
2989   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2990   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2991   if (nnz) {
2992     for (i=0; i<mbs; i++) {
2993       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2994       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2995     }
2996   }
2997 
2998   b    = (Mat_SeqBAIJ*)B->data;
2999   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3000   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3001   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3002 
3003   if (!flg) {
3004     switch (bs) {
3005     case 1:
3006       B->ops->mult    = MatMult_SeqBAIJ_1;
3007       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3008       break;
3009     case 2:
3010       B->ops->mult    = MatMult_SeqBAIJ_2;
3011       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3012       break;
3013     case 3:
3014       B->ops->mult    = MatMult_SeqBAIJ_3;
3015       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3016       break;
3017     case 4:
3018       B->ops->mult    = MatMult_SeqBAIJ_4;
3019       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3020       break;
3021     case 5:
3022       B->ops->mult    = MatMult_SeqBAIJ_5;
3023       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3024       break;
3025     case 6:
3026       B->ops->mult    = MatMult_SeqBAIJ_6;
3027       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3028       break;
3029     case 7:
3030       B->ops->mult    = MatMult_SeqBAIJ_7;
3031       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3032       break;
3033     case 15:
3034       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
3035       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3036       break;
3037     default:
3038       B->ops->mult    = MatMult_SeqBAIJ_N;
3039       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3040       break;
3041     }
3042   }
3043   B->ops->sor = MatSOR_SeqBAIJ;
3044   b->mbs = mbs;
3045   b->nbs = nbs;
3046   if (!skipallocation) {
3047     if (!b->imax) {
3048       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3049       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3050 
3051       b->free_imax_ilen = PETSC_TRUE;
3052     }
3053     /* b->ilen will count nonzeros in each block row so far. */
3054     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3055     if (!nnz) {
3056       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3057       else if (nz < 0) nz = 1;
3058       for (i=0; i<mbs; i++) b->imax[i] = nz;
3059       nz = nz*mbs;
3060     } else {
3061       nz = 0;
3062       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3063     }
3064 
3065     /* allocate the matrix space */
3066     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3067     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3068     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3069     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3070     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3071 
3072     b->singlemalloc = PETSC_TRUE;
3073     b->i[0]         = 0;
3074     for (i=1; i<mbs+1; i++) {
3075       b->i[i] = b->i[i-1] + b->imax[i-1];
3076     }
3077     b->free_a  = PETSC_TRUE;
3078     b->free_ij = PETSC_TRUE;
3079   } else {
3080     b->free_a  = PETSC_FALSE;
3081     b->free_ij = PETSC_FALSE;
3082   }
3083 
3084   b->bs2              = bs2;
3085   b->mbs              = mbs;
3086   b->nz               = 0;
3087   b->maxnz            = nz;
3088   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3089   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 #undef __FUNCT__
3094 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3095 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3096 {
3097   PetscInt       i,m,nz,nz_max=0,*nnz;
3098   PetscScalar    *values=0;
3099   PetscErrorCode ierr;
3100 
3101   PetscFunctionBegin;
3102   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3103   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3104   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3105   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3106   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3107   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3108   m    = B->rmap->n/bs;
3109 
3110   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3111   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3112   for (i=0; i<m; i++) {
3113     nz = ii[i+1]- ii[i];
3114     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3115     nz_max = PetscMax(nz_max, nz);
3116     nnz[i] = nz;
3117   }
3118   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3119   ierr = PetscFree(nnz);CHKERRQ(ierr);
3120 
3121   values = (PetscScalar*)V;
3122   if (!values) {
3123     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3124     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3125   }
3126   for (i=0; i<m; i++) {
3127     PetscInt          ncols  = ii[i+1] - ii[i];
3128     const PetscInt    *icols = jj + ii[i];
3129     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3130     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3131   }
3132   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3133   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3134   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3135   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3136   PetscFunctionReturn(0);
3137 }
3138 
3139 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3140 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3141 #if defined(PETSC_HAVE_MUMPS)
3142 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3143 #endif
3144 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3145 
3146 /*MC
3147    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3148    block sparse compressed row format.
3149 
3150    Options Database Keys:
3151 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3152 
3153   Level: beginner
3154 
3155 .seealso: MatCreateSeqBAIJ()
3156 M*/
3157 
3158 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatCreate_SeqBAIJ"
3162 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3163 {
3164   PetscErrorCode ierr;
3165   PetscMPIInt    size;
3166   Mat_SeqBAIJ    *b;
3167 
3168   PetscFunctionBegin;
3169   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3170   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3171 
3172   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3173   B->data = (void*)b;
3174   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3175 
3176   b->row          = 0;
3177   b->col          = 0;
3178   b->icol         = 0;
3179   b->reallocs     = 0;
3180   b->saved_values = 0;
3181 
3182   b->roworiented        = PETSC_TRUE;
3183   b->nonew              = 0;
3184   b->diag               = 0;
3185   b->solve_work         = 0;
3186   b->mult_work          = 0;
3187   B->spptr              = 0;
3188   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3189   b->keepnonzeropattern = PETSC_FALSE;
3190   b->xtoy               = 0;
3191   b->XtoY               = 0;
3192   B->same_nonzero       = PETSC_FALSE;
3193 
3194   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3195   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3196   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3197 #if defined(PETSC_HAVE_MUMPS)
3198   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3199 #endif
3200   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3201   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3202   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3203   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3204   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3205   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3206   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3207   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3208   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3209   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3210   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3211   PetscFunctionReturn(0);
3212 }
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3216 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3217 {
3218   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3219   PetscErrorCode ierr;
3220   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3221 
3222   PetscFunctionBegin;
3223   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3224 
3225   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3226     c->imax           = a->imax;
3227     c->ilen           = a->ilen;
3228     c->free_imax_ilen = PETSC_FALSE;
3229   } else {
3230     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3231     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3232     for (i=0; i<mbs; i++) {
3233       c->imax[i] = a->imax[i];
3234       c->ilen[i] = a->ilen[i];
3235     }
3236     c->free_imax_ilen = PETSC_TRUE;
3237   }
3238 
3239   /* allocate the matrix space */
3240   if (mallocmatspace) {
3241     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3242       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3243       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3244       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3245 
3246       c->i            = a->i;
3247       c->j            = a->j;
3248       c->singlemalloc = PETSC_FALSE;
3249       c->free_a       = PETSC_TRUE;
3250       c->free_ij      = PETSC_FALSE;
3251       c->parent       = A;
3252       C->preallocated = PETSC_TRUE;
3253       C->assembled    = PETSC_TRUE;
3254 
3255       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3256       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3257       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3258     } else {
3259       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3260       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3261 
3262       c->singlemalloc = PETSC_TRUE;
3263       c->free_a       = PETSC_TRUE;
3264       c->free_ij      = PETSC_TRUE;
3265 
3266       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3267       if (mbs > 0) {
3268         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3269         if (cpvalues == MAT_COPY_VALUES) {
3270           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3271         } else {
3272           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3273         }
3274       }
3275       C->preallocated = PETSC_TRUE;
3276       C->assembled    = PETSC_TRUE;
3277     }
3278   }
3279 
3280   c->roworiented = a->roworiented;
3281   c->nonew       = a->nonew;
3282 
3283   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3284   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3285 
3286   c->bs2         = a->bs2;
3287   c->mbs         = a->mbs;
3288   c->nbs         = a->nbs;
3289 
3290   if (a->diag) {
3291     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3292       c->diag      = a->diag;
3293       c->free_diag = PETSC_FALSE;
3294     } else {
3295       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3296       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3297       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3298       c->free_diag = PETSC_TRUE;
3299     }
3300   } else c->diag = 0;
3301 
3302   c->nz         = a->nz;
3303   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3304   c->solve_work = 0;
3305   c->mult_work  = 0;
3306 
3307   c->compressedrow.use   = a->compressedrow.use;
3308   c->compressedrow.nrows = a->compressedrow.nrows;
3309   c->compressedrow.check = a->compressedrow.check;
3310   if (a->compressedrow.use) {
3311     i    = a->compressedrow.nrows;
3312     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3313     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3314     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3315     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3316   } else {
3317     c->compressedrow.use    = PETSC_FALSE;
3318     c->compressedrow.i      = NULL;
3319     c->compressedrow.rindex = NULL;
3320   }
3321   C->same_nonzero = A->same_nonzero;
3322 
3323   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3324   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 #undef __FUNCT__
3329 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3330 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3331 {
3332   PetscErrorCode ierr;
3333 
3334   PetscFunctionBegin;
3335   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3336   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3337   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3338   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3339   PetscFunctionReturn(0);
3340 }
3341 
3342 #undef __FUNCT__
3343 #define __FUNCT__ "MatLoad_SeqBAIJ"
3344 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3345 {
3346   Mat_SeqBAIJ    *a;
3347   PetscErrorCode ierr;
3348   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3349   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3350   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3351   PetscInt       *masked,nmask,tmp,bs2,ishift;
3352   PetscMPIInt    size;
3353   int            fd;
3354   PetscScalar    *aa;
3355   MPI_Comm       comm;
3356 
3357   PetscFunctionBegin;
3358   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3359   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3360   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3361   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3362   bs2  = bs*bs;
3363 
3364   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3365   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3366   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3367   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3368   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3369   M = header[1]; N = header[2]; nz = header[3];
3370 
3371   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3372   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3373 
3374   /*
3375      This code adds extra rows to make sure the number of rows is
3376     divisible by the blocksize
3377   */
3378   mbs        = M/bs;
3379   extra_rows = bs - M + bs*(mbs);
3380   if (extra_rows == bs) extra_rows = 0;
3381   else mbs++;
3382   if (extra_rows) {
3383     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3384   }
3385 
3386   /* Set global sizes if not already set */
3387   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3388     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3389   } else { /* Check if the matrix global sizes are correct */
3390     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3391     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3392       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3393     }
3394     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3395   }
3396 
3397   /* read in row lengths */
3398   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3399   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3400   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3401 
3402   /* read in column indices */
3403   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3404   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3405   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3406 
3407   /* loop over row lengths determining block row lengths */
3408   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3409   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3410   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3411   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3412   rowcount = 0;
3413   nzcount  = 0;
3414   for (i=0; i<mbs; i++) {
3415     nmask = 0;
3416     for (j=0; j<bs; j++) {
3417       kmax = rowlengths[rowcount];
3418       for (k=0; k<kmax; k++) {
3419         tmp = jj[nzcount++]/bs;
3420         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3421       }
3422       rowcount++;
3423     }
3424     browlengths[i] += nmask;
3425     /* zero out the mask elements we set */
3426     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3427   }
3428 
3429   /* Do preallocation  */
3430   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3431   a    = (Mat_SeqBAIJ*)newmat->data;
3432 
3433   /* set matrix "i" values */
3434   a->i[0] = 0;
3435   for (i=1; i<= mbs; i++) {
3436     a->i[i]      = a->i[i-1] + browlengths[i-1];
3437     a->ilen[i-1] = browlengths[i-1];
3438   }
3439   a->nz = 0;
3440   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3441 
3442   /* read in nonzero values */
3443   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3444   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3445   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3446 
3447   /* set "a" and "j" values into matrix */
3448   nzcount = 0; jcount = 0;
3449   for (i=0; i<mbs; i++) {
3450     nzcountb = nzcount;
3451     nmask    = 0;
3452     for (j=0; j<bs; j++) {
3453       kmax = rowlengths[i*bs+j];
3454       for (k=0; k<kmax; k++) {
3455         tmp = jj[nzcount++]/bs;
3456         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3457       }
3458     }
3459     /* sort the masked values */
3460     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3461 
3462     /* set "j" values into matrix */
3463     maskcount = 1;
3464     for (j=0; j<nmask; j++) {
3465       a->j[jcount++]  = masked[j];
3466       mask[masked[j]] = maskcount++;
3467     }
3468     /* set "a" values into matrix */
3469     ishift = bs2*a->i[i];
3470     for (j=0; j<bs; j++) {
3471       kmax = rowlengths[i*bs+j];
3472       for (k=0; k<kmax; k++) {
3473         tmp       = jj[nzcountb]/bs;
3474         block     = mask[tmp] - 1;
3475         point     = jj[nzcountb] - bs*tmp;
3476         idx       = ishift + bs2*block + j + bs*point;
3477         a->a[idx] = (MatScalar)aa[nzcountb++];
3478       }
3479     }
3480     /* zero out the mask elements we set */
3481     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3482   }
3483   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3484 
3485   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3486   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3487   ierr = PetscFree(aa);CHKERRQ(ierr);
3488   ierr = PetscFree(jj);CHKERRQ(ierr);
3489   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3490 
3491   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3492   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatCreateSeqBAIJ"
3498 /*@C
3499    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3500    compressed row) format.  For good matrix assembly performance the
3501    user should preallocate the matrix storage by setting the parameter nz
3502    (or the array nnz).  By setting these parameters accurately, performance
3503    during matrix assembly can be increased by more than a factor of 50.
3504 
3505    Collective on MPI_Comm
3506 
3507    Input Parameters:
3508 +  comm - MPI communicator, set to PETSC_COMM_SELF
3509 .  bs - size of block
3510 .  m - number of rows
3511 .  n - number of columns
3512 .  nz - number of nonzero blocks  per block row (same for all rows)
3513 -  nnz - array containing the number of nonzero blocks in the various block rows
3514          (possibly different for each block row) or NULL
3515 
3516    Output Parameter:
3517 .  A - the matrix
3518 
3519    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3520    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3521    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3522 
3523    Options Database Keys:
3524 .   -mat_no_unroll - uses code that does not unroll the loops in the
3525                      block calculations (much slower)
3526 .    -mat_block_size - size of the blocks to use
3527 
3528    Level: intermediate
3529 
3530    Notes:
3531    The number of rows and columns must be divisible by blocksize.
3532 
3533    If the nnz parameter is given then the nz parameter is ignored
3534 
3535    A nonzero block is any block that as 1 or more nonzeros in it
3536 
3537    The block AIJ format is fully compatible with standard Fortran 77
3538    storage.  That is, the stored row and column indices can begin at
3539    either one (as in Fortran) or zero.  See the users' manual for details.
3540 
3541    Specify the preallocated storage with either nz or nnz (not both).
3542    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3543    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3544    matrices.
3545 
3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3547 @*/
3548 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3549 {
3550   PetscErrorCode ierr;
3551 
3552   PetscFunctionBegin;
3553   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3554   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3555   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3556   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3557   PetscFunctionReturn(0);
3558 }
3559 
3560 #undef __FUNCT__
3561 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3562 /*@C
3563    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3564    per row in the matrix. For good matrix assembly performance the
3565    user should preallocate the matrix storage by setting the parameter nz
3566    (or the array nnz).  By setting these parameters accurately, performance
3567    during matrix assembly can be increased by more than a factor of 50.
3568 
3569    Collective on MPI_Comm
3570 
3571    Input Parameters:
3572 +  A - the matrix
3573 .  bs - size of block
3574 .  nz - number of block nonzeros per block row (same for all rows)
3575 -  nnz - array containing the number of block nonzeros in the various block rows
3576          (possibly different for each block row) or NULL
3577 
3578    Options Database Keys:
3579 .   -mat_no_unroll - uses code that does not unroll the loops in the
3580                      block calculations (much slower)
3581 .    -mat_block_size - size of the blocks to use
3582 
3583    Level: intermediate
3584 
3585    Notes:
3586    If the nnz parameter is given then the nz parameter is ignored
3587 
3588    You can call MatGetInfo() to get information on how effective the preallocation was;
3589    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3590    You can also run with the option -info and look for messages with the string
3591    malloc in them to see if additional memory allocation was needed.
3592 
3593    The block AIJ format is fully compatible with standard Fortran 77
3594    storage.  That is, the stored row and column indices can begin at
3595    either one (as in Fortran) or zero.  See the users' manual for details.
3596 
3597    Specify the preallocated storage with either nz or nnz (not both).
3598    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3599    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3600 
3601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3602 @*/
3603 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3604 {
3605   PetscErrorCode ierr;
3606 
3607   PetscFunctionBegin;
3608   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3609   PetscValidType(B,1);
3610   PetscValidLogicalCollectiveInt(B,bs,2);
3611   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3612   PetscFunctionReturn(0);
3613 }
3614 
3615 #undef __FUNCT__
3616 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3617 /*@C
3618    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3619    (the default sequential PETSc format).
3620 
3621    Collective on MPI_Comm
3622 
3623    Input Parameters:
3624 +  A - the matrix
3625 .  i - the indices into j for the start of each local row (starts with zero)
3626 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3627 -  v - optional values in the matrix
3628 
3629    Level: developer
3630 
3631 .keywords: matrix, aij, compressed row, sparse
3632 
3633 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3634 @*/
3635 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3636 {
3637   PetscErrorCode ierr;
3638 
3639   PetscFunctionBegin;
3640   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3641   PetscValidType(B,1);
3642   PetscValidLogicalCollectiveInt(B,bs,2);
3643   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3644   PetscFunctionReturn(0);
3645 }
3646 
3647 
3648 #undef __FUNCT__
3649 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3650 /*@
3651      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3652 
3653      Collective on MPI_Comm
3654 
3655    Input Parameters:
3656 +  comm - must be an MPI communicator of size 1
3657 .  bs - size of block
3658 .  m - number of rows
3659 .  n - number of columns
3660 .  i - row indices
3661 .  j - column indices
3662 -  a - matrix values
3663 
3664    Output Parameter:
3665 .  mat - the matrix
3666 
3667    Level: advanced
3668 
3669    Notes:
3670        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3671     once the matrix is destroyed
3672 
3673        You cannot set new nonzero locations into this matrix, that will generate an error.
3674 
3675        The i and j indices are 0 based
3676 
3677        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3678 
3679 
3680 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3681 
3682 @*/
3683 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3684 {
3685   PetscErrorCode ierr;
3686   PetscInt       ii;
3687   Mat_SeqBAIJ    *baij;
3688 
3689   PetscFunctionBegin;
3690   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3691   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3692 
3693   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3694   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3695   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3696   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3697   baij = (Mat_SeqBAIJ*)(*mat)->data;
3698   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3699   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3700 
3701   baij->i = i;
3702   baij->j = j;
3703   baij->a = a;
3704 
3705   baij->singlemalloc = PETSC_FALSE;
3706   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3707   baij->free_a       = PETSC_FALSE;
3708   baij->free_ij      = PETSC_FALSE;
3709 
3710   for (ii=0; ii<m; ii++) {
3711     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3712 #if defined(PETSC_USE_DEBUG)
3713     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3714 #endif
3715   }
3716 #if defined(PETSC_USE_DEBUG)
3717   for (ii=0; ii<baij->i[m]; ii++) {
3718     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3719     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3720   }
3721 #endif
3722 
3723   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3724   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3725   PetscFunctionReturn(0);
3726 }
3727