xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2e92ee13a8395f820cc1e3fd74a7607ed52efa2a)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      zeropivotdetected;
21 
22   PetscFunctionBegin;
23   if (a->idiagvalid) {
24     if (values) *values = a->idiag;
25     PetscFunctionReturn(0);
26   }
27   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
28   diag_offset = a->diag;
29   if (!a->idiag) {
30     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
31     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
32   }
33   diag  = a->idiag;
34   mdiag = a->idiag+bs2*mbs;
35   if (values) *values = a->idiag;
36   /* factor and invert each block */
37   switch (bs) {
38   case 1:
39     for (i=0; i<mbs; i++) {
40       odiag    = v + 1*diag_offset[i];
41       diag[0]  = odiag[0];
42       mdiag[0] = odiag[0];
43       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
44       diag    += 1;
45       mdiag   += 1;
46     }
47     break;
48   case 2:
49     for (i=0; i<mbs; i++) {
50       odiag    = v + 4*diag_offset[i];
51       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
52       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
53       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr);
54       if (zeropivotdetected) break;
55       diag    += 4;
56       mdiag   += 4;
57     }
58     break;
59   case 3:
60     for (i=0; i<mbs; i++) {
61       odiag    = v + 9*diag_offset[i];
62       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
63       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
64       diag[8]  = odiag[8];
65       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
66       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
67       mdiag[8] = odiag[8];
68       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr);
69       if (zeropivotdetected) break;
70       diag    += 9;
71       mdiag   += 9;
72     }
73     break;
74   case 4:
75     for (i=0; i<mbs; i++) {
76       odiag  = v + 16*diag_offset[i];
77       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
78       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
79       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr);
80       if (zeropivotdetected) break;
81       diag  += 16;
82       mdiag += 16;
83     }
84     break;
85   case 5:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 25*diag_offset[i];
88       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
90       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr);
91       if (zeropivotdetected) break;
92       diag  += 25;
93       mdiag += 25;
94     }
95     break;
96   case 6:
97     for (i=0; i<mbs; i++) {
98       odiag  = v + 36*diag_offset[i];
99       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
100       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
101       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr);
102       if (zeropivotdetected) break;
103       diag  += 36;
104       mdiag += 36;
105     }
106     break;
107   case 7:
108     for (i=0; i<mbs; i++) {
109       odiag  = v + 49*diag_offset[i];
110       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
111       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
112       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr);
113       if (zeropivotdetected) break;
114       diag  += 49;
115       mdiag += 49;
116     }
117     break;
118   default:
119     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
120     for (i=0; i<mbs; i++) {
121       odiag  = v + bs2*diag_offset[i];
122       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
123       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
124       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
125       diag  += bs2;
126       mdiag += bs2;
127     }
128     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
129   }
130   a->idiagvalid = PETSC_TRUE;
131   if (zeropivotdetected) {
132     A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSOR_SeqBAIJ"
139 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
140 {
141   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
142   PetscScalar       *x,*work,*w,*workt,*t;
143   const MatScalar   *v,*aa = a->a, *idiag;
144   const PetscScalar *b,*xb;
145   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
146   PetscErrorCode    ierr;
147   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
148   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
149 
150   PetscFunctionBegin;
151   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
152   its = its*lits;
153   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
154   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
155   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
156   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
157   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
158 
159   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
160 
161   if (!m) PetscFunctionReturn(0);
162   diag  = a->diag;
163   idiag = a->idiag;
164   k    = PetscMax(A->rmap->n,A->cmap->n);
165   if (!a->mult_work) {
166     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
167   }
168   if (!a->sor_workt) {
169     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
170   }
171   if (!a->sor_work) {
172     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
173   }
174   work = a->mult_work;
175   t    = a->sor_workt;
176   w    = a->sor_work;
177 
178   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
179   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
180 
181   if (flag & SOR_ZERO_INITIAL_GUESS) {
182     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
183       switch (bs) {
184       case 1:
185         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
186         t[0] = b[0];
187         i2     = 1;
188         idiag += 1;
189         for (i=1; i<m; i++) {
190           v  = aa + ai[i];
191           vi = aj + ai[i];
192           nz = diag[i] - ai[i];
193           s[0] = b[i2];
194           for (j=0; j<nz; j++) {
195             xw[0] = x[vi[j]];
196             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
197           }
198           t[i2] = s[0];
199           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
200           x[i2]  = xw[0];
201           idiag += 1;
202           i2    += 1;
203         }
204         break;
205       case 2:
206         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
207         t[0] = b[0]; t[1] = b[1];
208         i2     = 2;
209         idiag += 4;
210         for (i=1; i<m; i++) {
211           v  = aa + 4*ai[i];
212           vi = aj + ai[i];
213           nz = diag[i] - ai[i];
214           s[0] = b[i2]; s[1] = b[i2+1];
215           for (j=0; j<nz; j++) {
216             idx = 2*vi[j];
217             it  = 4*j;
218             xw[0] = x[idx]; xw[1] = x[1+idx];
219             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
220           }
221           t[i2] = s[0]; t[i2+1] = s[1];
222           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
223           x[i2]   = xw[0]; x[i2+1] = xw[1];
224           idiag  += 4;
225           i2     += 2;
226         }
227         break;
228       case 3:
229         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
230         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
231         i2     = 3;
232         idiag += 9;
233         for (i=1; i<m; i++) {
234           v  = aa + 9*ai[i];
235           vi = aj + ai[i];
236           nz = diag[i] - ai[i];
237           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
238           while (nz--) {
239             idx = 3*(*vi++);
240             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
241             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
242             v  += 9;
243           }
244           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
245           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
246           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
247           idiag  += 9;
248           i2     += 3;
249         }
250         break;
251       case 4:
252         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
253         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
254         i2     = 4;
255         idiag += 16;
256         for (i=1; i<m; i++) {
257           v  = aa + 16*ai[i];
258           vi = aj + ai[i];
259           nz = diag[i] - ai[i];
260           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
261           while (nz--) {
262             idx = 4*(*vi++);
263             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
264             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
265             v  += 16;
266           }
267           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
268           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
269           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
270           idiag  += 16;
271           i2     += 4;
272         }
273         break;
274       case 5:
275         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
276         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
277         i2     = 5;
278         idiag += 25;
279         for (i=1; i<m; i++) {
280           v  = aa + 25*ai[i];
281           vi = aj + ai[i];
282           nz = diag[i] - ai[i];
283           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
284           while (nz--) {
285             idx = 5*(*vi++);
286             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
287             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
288             v  += 25;
289           }
290           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
291           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
292           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
293           idiag  += 25;
294           i2     += 5;
295         }
296         break;
297       case 6:
298         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
299         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
300         i2     = 6;
301         idiag += 36;
302         for (i=1; i<m; i++) {
303           v  = aa + 36*ai[i];
304           vi = aj + ai[i];
305           nz = diag[i] - ai[i];
306           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
307           while (nz--) {
308             idx = 6*(*vi++);
309             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
310             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
311             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
312             v  += 36;
313           }
314           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
315           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
316           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
317           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
318           idiag  += 36;
319           i2     += 6;
320         }
321         break;
322       case 7:
323         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
324         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
325         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
326         i2     = 7;
327         idiag += 49;
328         for (i=1; i<m; i++) {
329           v  = aa + 49*ai[i];
330           vi = aj + ai[i];
331           nz = diag[i] - ai[i];
332           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
333           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
334           while (nz--) {
335             idx = 7*(*vi++);
336             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
337             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
338             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
339             v  += 49;
340           }
341           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
342           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
343           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
344           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
345           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
346           idiag  += 49;
347           i2     += 7;
348         }
349         break;
350       default:
351         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
352         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
353         i2     = bs;
354         idiag += bs2;
355         for (i=1; i<m; i++) {
356           v  = aa + bs2*ai[i];
357           vi = aj + ai[i];
358           nz = diag[i] - ai[i];
359 
360           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
361           /* copy all rows of x that are needed into contiguous space */
362           workt = work;
363           for (j=0; j<nz; j++) {
364             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
365             workt += bs;
366           }
367           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
368           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
369           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
370 
371           idiag += bs2;
372           i2    += bs;
373         }
374         break;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
378       xb = t;
379     }
380     else xb = b;
381     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
382       idiag = a->idiag+bs2*(a->mbs-1);
383       i2 = bs * (m-1);
384       switch (bs) {
385       case 1:
386         s[0]  = xb[i2];
387         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
388         x[i2] = xw[0];
389         i2   -= 1;
390         for (i=m-2; i>=0; i--) {
391           v  = aa + (diag[i]+1);
392           vi = aj + diag[i] + 1;
393           nz = ai[i+1] - diag[i] - 1;
394           s[0] = xb[i2];
395           for (j=0; j<nz; j++) {
396             xw[0] = x[vi[j]];
397             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
398           }
399           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
400           x[i2]  = xw[0];
401           idiag -= 1;
402           i2    -= 1;
403         }
404         break;
405       case 2:
406         s[0]  = xb[i2]; s[1] = xb[i2+1];
407         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
408         x[i2] = xw[0]; x[i2+1] = xw[1];
409         i2    -= 2;
410         idiag -= 4;
411         for (i=m-2; i>=0; i--) {
412           v  = aa + 4*(diag[i] + 1);
413           vi = aj + diag[i] + 1;
414           nz = ai[i+1] - diag[i] - 1;
415           s[0] = xb[i2]; s[1] = xb[i2+1];
416           for (j=0; j<nz; j++) {
417             idx = 2*vi[j];
418             it  = 4*j;
419             xw[0] = x[idx]; xw[1] = x[1+idx];
420             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
421           }
422           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
423           x[i2]   = xw[0]; x[i2+1] = xw[1];
424           idiag  -= 4;
425           i2     -= 2;
426         }
427         break;
428       case 3:
429         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
430         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
431         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
432         i2    -= 3;
433         idiag -= 9;
434         for (i=m-2; i>=0; i--) {
435           v  = aa + 9*(diag[i]+1);
436           vi = aj + diag[i] + 1;
437           nz = ai[i+1] - diag[i] - 1;
438           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
439           while (nz--) {
440             idx = 3*(*vi++);
441             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
442             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
443             v  += 9;
444           }
445           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
446           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
447           idiag  -= 9;
448           i2     -= 3;
449         }
450         break;
451       case 4:
452         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
453         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
454         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
455         i2    -= 4;
456         idiag -= 16;
457         for (i=m-2; i>=0; i--) {
458           v  = aa + 16*(diag[i]+1);
459           vi = aj + diag[i] + 1;
460           nz = ai[i+1] - diag[i] - 1;
461           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
462           while (nz--) {
463             idx = 4*(*vi++);
464             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
465             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
466             v  += 16;
467           }
468           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
469           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
470           idiag  -= 16;
471           i2     -= 4;
472         }
473         break;
474       case 5:
475         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
476         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
477         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
478         i2    -= 5;
479         idiag -= 25;
480         for (i=m-2; i>=0; i--) {
481           v  = aa + 25*(diag[i]+1);
482           vi = aj + diag[i] + 1;
483           nz = ai[i+1] - diag[i] - 1;
484           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
485           while (nz--) {
486             idx = 5*(*vi++);
487             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
488             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
489             v  += 25;
490           }
491           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
492           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
493           idiag  -= 25;
494           i2     -= 5;
495         }
496         break;
497       case 6:
498         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
499         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
500         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
501         i2    -= 6;
502         idiag -= 36;
503         for (i=m-2; i>=0; i--) {
504           v  = aa + 36*(diag[i]+1);
505           vi = aj + diag[i] + 1;
506           nz = ai[i+1] - diag[i] - 1;
507           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
508           while (nz--) {
509             idx = 6*(*vi++);
510             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
511             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
512             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
513             v  += 36;
514           }
515           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
516           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
517           idiag  -= 36;
518           i2     -= 6;
519         }
520         break;
521       case 7:
522         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
523         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
524         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
525         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
526         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
527         i2    -= 7;
528         idiag -= 49;
529         for (i=m-2; i>=0; i--) {
530           v  = aa + 49*(diag[i]+1);
531           vi = aj + diag[i] + 1;
532           nz = ai[i+1] - diag[i] - 1;
533           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
534           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
535           while (nz--) {
536             idx = 7*(*vi++);
537             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
538             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
539             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
540             v  += 49;
541           }
542           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
543           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
544           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
545           idiag  -= 49;
546           i2     -= 7;
547         }
548         break;
549       default:
550         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
551         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
552         i2    -= bs;
553         idiag -= bs2;
554         for (i=m-2; i>=0; i--) {
555           v  = aa + bs2*(diag[i]+1);
556           vi = aj + diag[i] + 1;
557           nz = ai[i+1] - diag[i] - 1;
558 
559           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
560           /* copy all rows of x that are needed into contiguous space */
561           workt = work;
562           for (j=0; j<nz; j++) {
563             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
564             workt += bs;
565           }
566           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
567           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
568 
569           idiag -= bs2;
570           i2    -= bs;
571         }
572         break;
573       }
574       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
575     }
576     its--;
577   }
578   while (its--) {
579     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
580       idiag = a->idiag;
581       i2 = 0;
582       switch (bs) {
583       case 1:
584         for (i=0; i<m; i++) {
585           v  = aa + ai[i];
586           vi = aj + ai[i];
587           nz = ai[i+1] - ai[i];
588           s[0] = b[i2];
589           for (j=0; j<nz; j++) {
590             xw[0] = x[vi[j]];
591             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
592           }
593           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
594           x[i2] += xw[0];
595           idiag += 1;
596           i2    += 1;
597         }
598         break;
599       case 2:
600         for (i=0; i<m; i++) {
601           v  = aa + 4*ai[i];
602           vi = aj + ai[i];
603           nz = ai[i+1] - ai[i];
604           s[0] = b[i2]; s[1] = b[i2+1];
605           for (j=0; j<nz; j++) {
606             idx = 2*vi[j];
607             it  = 4*j;
608             xw[0] = x[idx]; xw[1] = x[1+idx];
609             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
610           }
611           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
612           x[i2]  += xw[0]; x[i2+1] += xw[1];
613           idiag  += 4;
614           i2     += 2;
615         }
616         break;
617       case 3:
618         for (i=0; i<m; i++) {
619           v  = aa + 9*ai[i];
620           vi = aj + ai[i];
621           nz = ai[i+1] - ai[i];
622           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
623           while (nz--) {
624             idx = 3*(*vi++);
625             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
626             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
627             v  += 9;
628           }
629           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
630           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
631           idiag  += 9;
632           i2     += 3;
633         }
634         break;
635       case 4:
636         for (i=0; i<m; i++) {
637           v  = aa + 16*ai[i];
638           vi = aj + ai[i];
639           nz = ai[i+1] - ai[i];
640           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
641           while (nz--) {
642             idx = 4*(*vi++);
643             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
644             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
645             v  += 16;
646           }
647           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
648           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
649           idiag  += 16;
650           i2     += 4;
651         }
652         break;
653       case 5:
654         for (i=0; i<m; i++) {
655           v  = aa + 25*ai[i];
656           vi = aj + ai[i];
657           nz = ai[i+1] - ai[i];
658           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
659           while (nz--) {
660             idx = 5*(*vi++);
661             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
662             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
663             v  += 25;
664           }
665           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
666           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
667           idiag  += 25;
668           i2     += 5;
669         }
670         break;
671       case 6:
672         for (i=0; i<m; i++) {
673           v  = aa + 36*ai[i];
674           vi = aj + ai[i];
675           nz = ai[i+1] - ai[i];
676           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
677           while (nz--) {
678             idx = 6*(*vi++);
679             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
680             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
681             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
682             v  += 36;
683           }
684           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
685           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
686           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
687           idiag  += 36;
688           i2     += 6;
689         }
690         break;
691       case 7:
692         for (i=0; i<m; i++) {
693           v  = aa + 49*ai[i];
694           vi = aj + ai[i];
695           nz = ai[i+1] - ai[i];
696           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
697           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
698           while (nz--) {
699             idx = 7*(*vi++);
700             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
701             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
702             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
703             v  += 49;
704           }
705           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
706           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
707           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
708           idiag  += 49;
709           i2     += 7;
710         }
711         break;
712       default:
713         for (i=0; i<m; i++) {
714           v  = aa + bs2*ai[i];
715           vi = aj + ai[i];
716           nz = ai[i+1] - ai[i];
717 
718           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
719           /* copy all rows of x that are needed into contiguous space */
720           workt = work;
721           for (j=0; j<nz; j++) {
722             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
723             workt += bs;
724           }
725           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
726           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
727 
728           idiag += bs2;
729           i2    += bs;
730         }
731         break;
732       }
733       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
734     }
735     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
736       idiag = a->idiag+bs2*(a->mbs-1);
737       i2 = bs * (m-1);
738       switch (bs) {
739       case 1:
740         for (i=m-1; i>=0; i--) {
741           v  = aa + ai[i];
742           vi = aj + ai[i];
743           nz = ai[i+1] - ai[i];
744           s[0] = b[i2];
745           for (j=0; j<nz; j++) {
746             xw[0] = x[vi[j]];
747             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
748           }
749           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
750           x[i2] += xw[0];
751           idiag -= 1;
752           i2    -= 1;
753         }
754         break;
755       case 2:
756         for (i=m-1; i>=0; i--) {
757           v  = aa + 4*ai[i];
758           vi = aj + ai[i];
759           nz = ai[i+1] - ai[i];
760           s[0] = b[i2]; s[1] = b[i2+1];
761           for (j=0; j<nz; j++) {
762             idx = 2*vi[j];
763             it  = 4*j;
764             xw[0] = x[idx]; xw[1] = x[1+idx];
765             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
766           }
767           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
768           x[i2]  += xw[0]; x[i2+1] += xw[1];
769           idiag  -= 4;
770           i2     -= 2;
771         }
772         break;
773       case 3:
774         for (i=m-1; i>=0; i--) {
775           v  = aa + 9*ai[i];
776           vi = aj + ai[i];
777           nz = ai[i+1] - ai[i];
778           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
779           while (nz--) {
780             idx = 3*(*vi++);
781             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
782             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
783             v  += 9;
784           }
785           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
786           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
787           idiag  -= 9;
788           i2     -= 3;
789         }
790         break;
791       case 4:
792         for (i=m-1; i>=0; i--) {
793           v  = aa + 16*ai[i];
794           vi = aj + ai[i];
795           nz = ai[i+1] - ai[i];
796           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
797           while (nz--) {
798             idx = 4*(*vi++);
799             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
800             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
801             v  += 16;
802           }
803           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
804           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
805           idiag  -= 16;
806           i2     -= 4;
807         }
808         break;
809       case 5:
810         for (i=m-1; i>=0; i--) {
811           v  = aa + 25*ai[i];
812           vi = aj + ai[i];
813           nz = ai[i+1] - ai[i];
814           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
815           while (nz--) {
816             idx = 5*(*vi++);
817             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
818             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
819             v  += 25;
820           }
821           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
822           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
823           idiag  -= 25;
824           i2     -= 5;
825         }
826         break;
827       case 6:
828         for (i=m-1; i>=0; i--) {
829           v  = aa + 36*ai[i];
830           vi = aj + ai[i];
831           nz = ai[i+1] - ai[i];
832           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
833           while (nz--) {
834             idx = 6*(*vi++);
835             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
836             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
837             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
838             v  += 36;
839           }
840           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
841           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
842           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
843           idiag  -= 36;
844           i2     -= 6;
845         }
846         break;
847       case 7:
848         for (i=m-1; i>=0; i--) {
849           v  = aa + 49*ai[i];
850           vi = aj + ai[i];
851           nz = ai[i+1] - ai[i];
852           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
853           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
854           while (nz--) {
855             idx = 7*(*vi++);
856             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
857             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
858             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
859             v  += 49;
860           }
861           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
862           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
863           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
864           idiag  -= 49;
865           i2     -= 7;
866         }
867         break;
868       default:
869         for (i=m-1; i>=0; i--) {
870           v  = aa + bs2*ai[i];
871           vi = aj + ai[i];
872           nz = ai[i+1] - ai[i];
873 
874           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
875           /* copy all rows of x that are needed into contiguous space */
876           workt = work;
877           for (j=0; j<nz; j++) {
878             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
879             workt += bs;
880           }
881           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
882           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
883 
884           idiag -= bs2;
885           i2    -= bs;
886         }
887         break;
888       }
889       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
890     }
891   }
892   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
893   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 
898 /*
899     Special version for direct calls from Fortran (Used in PETSc-fun3d)
900 */
901 #if defined(PETSC_HAVE_FORTRAN_CAPS)
902 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
903 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
904 #define matsetvaluesblocked4_ matsetvaluesblocked4
905 #endif
906 
907 #undef __FUNCT__
908 #define __FUNCT__ "matsetvaluesblocked4_"
909 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
910 {
911   Mat               A  = *AA;
912   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
913   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
914   PetscInt          *ai    =a->i,*ailen=a->ilen;
915   PetscInt          *aj    =a->j,stepval,lastcol = -1;
916   const PetscScalar *value = v;
917   MatScalar         *ap,*aa = a->a,*bap;
918 
919   PetscFunctionBegin;
920   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
921   stepval = (n-1)*4;
922   for (k=0; k<m; k++) { /* loop over added rows */
923     row  = im[k];
924     rp   = aj + ai[row];
925     ap   = aa + 16*ai[row];
926     nrow = ailen[row];
927     low  = 0;
928     high = nrow;
929     for (l=0; l<n; l++) { /* loop over added columns */
930       col = in[l];
931       if (col <= lastcol)  low = 0;
932       else                high = nrow;
933       lastcol = col;
934       value   = v + k*(stepval+4 + l)*4;
935       while (high-low > 7) {
936         t = (low+high)/2;
937         if (rp[t] > col) high = t;
938         else             low  = t;
939       }
940       for (i=low; i<high; i++) {
941         if (rp[i] > col) break;
942         if (rp[i] == col) {
943           bap = ap +  16*i;
944           for (ii=0; ii<4; ii++,value+=stepval) {
945             for (jj=ii; jj<16; jj+=4) {
946               bap[jj] += *value++;
947             }
948           }
949           goto noinsert2;
950         }
951       }
952       N = nrow++ - 1;
953       high++; /* added new column index thus must search to one higher than before */
954       /* shift up all the later entries in this row */
955       for (ii=N; ii>=i; ii--) {
956         rp[ii+1] = rp[ii];
957         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
958       }
959       if (N >= i) {
960         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
961       }
962       rp[i] = col;
963       bap   = ap +  16*i;
964       for (ii=0; ii<4; ii++,value+=stepval) {
965         for (jj=ii; jj<16; jj+=4) {
966           bap[jj] = *value++;
967         }
968       }
969       noinsert2:;
970       low = i;
971     }
972     ailen[row] = nrow;
973   }
974   PetscFunctionReturnVoid();
975 }
976 
977 #if defined(PETSC_HAVE_FORTRAN_CAPS)
978 #define matsetvalues4_ MATSETVALUES4
979 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
980 #define matsetvalues4_ matsetvalues4
981 #endif
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatSetValues4_"
985 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
986 {
987   Mat         A  = *AA;
988   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
989   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
990   PetscInt    *ai=a->i,*ailen=a->ilen;
991   PetscInt    *aj=a->j,brow,bcol;
992   PetscInt    ridx,cidx,lastcol = -1;
993   MatScalar   *ap,value,*aa=a->a,*bap;
994 
995   PetscFunctionBegin;
996   for (k=0; k<m; k++) { /* loop over added rows */
997     row  = im[k]; brow = row/4;
998     rp   = aj + ai[brow];
999     ap   = aa + 16*ai[brow];
1000     nrow = ailen[brow];
1001     low  = 0;
1002     high = nrow;
1003     for (l=0; l<n; l++) { /* loop over added columns */
1004       col   = in[l]; bcol = col/4;
1005       ridx  = row % 4; cidx = col % 4;
1006       value = v[l + k*n];
1007       if (col <= lastcol)  low = 0;
1008       else                high = nrow;
1009       lastcol = col;
1010       while (high-low > 7) {
1011         t = (low+high)/2;
1012         if (rp[t] > bcol) high = t;
1013         else              low  = t;
1014       }
1015       for (i=low; i<high; i++) {
1016         if (rp[i] > bcol) break;
1017         if (rp[i] == bcol) {
1018           bap   = ap +  16*i + 4*cidx + ridx;
1019           *bap += value;
1020           goto noinsert1;
1021         }
1022       }
1023       N = nrow++ - 1;
1024       high++; /* added new column thus must search to one higher than before */
1025       /* shift up all the later entries in this row */
1026       for (ii=N; ii>=i; ii--) {
1027         rp[ii+1] = rp[ii];
1028         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1029       }
1030       if (N>=i) {
1031         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1032       }
1033       rp[i]                    = bcol;
1034       ap[16*i + 4*cidx + ridx] = value;
1035 noinsert1:;
1036       low = i;
1037     }
1038     ailen[brow] = nrow;
1039   }
1040   PetscFunctionReturnVoid();
1041 }
1042 
1043 /*
1044      Checks for missing diagonals
1045 */
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1048 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1049 {
1050   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1051   PetscErrorCode ierr;
1052   PetscInt       *diag,*ii = a->i,i;
1053 
1054   PetscFunctionBegin;
1055   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1056   *missing = PETSC_FALSE;
1057   if (A->rmap->n > 0 && !ii) {
1058     *missing = PETSC_TRUE;
1059     if (d) *d = 0;
1060     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1061   } else {
1062     diag = a->diag;
1063     for (i=0; i<a->mbs; i++) {
1064       if (diag[i] >= ii[i+1]) {
1065         *missing = PETSC_TRUE;
1066         if (d) *d = i;
1067         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1068         break;
1069       }
1070     }
1071   }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1077 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1078 {
1079   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1080   PetscErrorCode ierr;
1081   PetscInt       i,j,m = a->mbs;
1082 
1083   PetscFunctionBegin;
1084   if (!a->diag) {
1085     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1086     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1087     a->free_diag = PETSC_TRUE;
1088   }
1089   for (i=0; i<m; i++) {
1090     a->diag[i] = a->i[i+1];
1091     for (j=a->i[i]; j<a->i[i+1]; j++) {
1092       if (a->j[j] == i) {
1093         a->diag[i] = j;
1094         break;
1095       }
1096     }
1097   }
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1105 {
1106   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1109   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1110 
1111   PetscFunctionBegin;
1112   *nn = n;
1113   if (!ia) PetscFunctionReturn(0);
1114   if (symmetric) {
1115     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1116     nz   = tia[n];
1117   } else {
1118     tia = a->i; tja = a->j;
1119   }
1120 
1121   if (!blockcompressed && bs > 1) {
1122     (*nn) *= bs;
1123     /* malloc & create the natural set of indices */
1124     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1125     if (n) {
1126       (*ia)[0] = 0;
1127       for (j=1; j<bs; j++) {
1128         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1129       }
1130     }
1131 
1132     for (i=1; i<n; i++) {
1133       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1134       for (j=1; j<bs; j++) {
1135         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1136       }
1137     }
1138     if (n) {
1139       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1140     }
1141 
1142     if (inja) {
1143       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1144       cnt = 0;
1145       for (i=0; i<n; i++) {
1146         for (j=0; j<bs; j++) {
1147           for (k=tia[i]; k<tia[i+1]; k++) {
1148             for (l=0; l<bs; l++) {
1149               (*ja)[cnt++] = bs*tja[k] + l;
1150             }
1151           }
1152         }
1153       }
1154     }
1155 
1156     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1157       ierr = PetscFree(tia);CHKERRQ(ierr);
1158       ierr = PetscFree(tja);CHKERRQ(ierr);
1159     }
1160   } else if (oshift == 1) {
1161     if (symmetric) {
1162       nz = tia[A->rmap->n/bs];
1163       /*  add 1 to i and j indices */
1164       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1165       *ia = tia;
1166       if (ja) {
1167         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1168         *ja = tja;
1169       }
1170     } else {
1171       nz = a->i[A->rmap->n/bs];
1172       /* malloc space and  add 1 to i and j indices */
1173       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1174       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1175       if (ja) {
1176         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1177         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1178       }
1179     }
1180   } else {
1181     *ia = tia;
1182     if (ja) *ja = tja;
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1189 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1190 {
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   if (!ia) PetscFunctionReturn(0);
1195   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1196     ierr = PetscFree(*ia);CHKERRQ(ierr);
1197     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1198   }
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1204 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1205 {
1206   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210 #if defined(PETSC_USE_LOG)
1211   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1212 #endif
1213   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1214   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1215   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1216   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1217   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1218   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1219   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1220   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1221   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1222   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1223   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1224   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1225   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1226 
1227   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1228   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1229   ierr = PetscFree(A->data);CHKERRQ(ierr);
1230 
1231   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1247 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1248 {
1249   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   switch (op) {
1254   case MAT_ROW_ORIENTED:
1255     a->roworiented = flg;
1256     break;
1257   case MAT_KEEP_NONZERO_PATTERN:
1258     a->keepnonzeropattern = flg;
1259     break;
1260   case MAT_NEW_NONZERO_LOCATIONS:
1261     a->nonew = (flg ? 0 : 1);
1262     break;
1263   case MAT_NEW_NONZERO_LOCATION_ERR:
1264     a->nonew = (flg ? -1 : 0);
1265     break;
1266   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1267     a->nonew = (flg ? -2 : 0);
1268     break;
1269   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1270     a->nounused = (flg ? -1 : 0);
1271     break;
1272   case MAT_NEW_DIAGONALS:
1273   case MAT_IGNORE_OFF_PROC_ENTRIES:
1274   case MAT_USE_HASH_TABLE:
1275     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1276     break;
1277   case MAT_SPD:
1278   case MAT_SYMMETRIC:
1279   case MAT_STRUCTURALLY_SYMMETRIC:
1280   case MAT_HERMITIAN:
1281   case MAT_SYMMETRY_ETERNAL:
1282     /* These options are handled directly by MatSetOption() */
1283     break;
1284   default:
1285     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1286   }
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1293 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1294 {
1295   PetscErrorCode ierr;
1296   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1297   MatScalar      *aa_i;
1298   PetscScalar    *v_i;
1299 
1300   PetscFunctionBegin;
1301   bs  = A->rmap->bs;
1302   bs2 = bs*bs;
1303   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1304 
1305   bn  = row/bs;   /* Block number */
1306   bp  = row % bs; /* Block Position */
1307   M   = ai[bn+1] - ai[bn];
1308   *nz = bs*M;
1309 
1310   if (v) {
1311     *v = 0;
1312     if (*nz) {
1313       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1314       for (i=0; i<M; i++) { /* for each block in the block row */
1315         v_i  = *v + i*bs;
1316         aa_i = aa + bs2*(ai[bn] + i);
1317         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1318       }
1319     }
1320   }
1321 
1322   if (idx) {
1323     *idx = 0;
1324     if (*nz) {
1325       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1326       for (i=0; i<M; i++) { /* for each block in the block row */
1327         idx_i = *idx + i*bs;
1328         itmp  = bs*aj[ai[bn] + i];
1329         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1330       }
1331     }
1332   }
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1338 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1339 {
1340   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1350 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1351 {
1352   PetscErrorCode ierr;
1353 
1354   PetscFunctionBegin;
1355   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1356   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1364 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1365 {
1366   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1367   Mat            C;
1368   PetscErrorCode ierr;
1369   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1370   PetscInt       *rows,*cols,bs2=a->bs2;
1371   MatScalar      *array;
1372 
1373   PetscFunctionBegin;
1374   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1375   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1376     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1377 
1378     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1379     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1380     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1381     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1382     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1383     ierr = PetscFree(col);CHKERRQ(ierr);
1384   } else {
1385     C = *B;
1386   }
1387 
1388   array = a->a;
1389   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1390   for (i=0; i<mbs; i++) {
1391     cols[0] = i*bs;
1392     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1393     len = ai[i+1] - ai[i];
1394     for (j=0; j<len; j++) {
1395       rows[0] = (*aj++)*bs;
1396       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1397       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1398       array += bs2;
1399     }
1400   }
1401   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1402 
1403   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1404   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1405 
1406   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1407     *B = C;
1408   } else {
1409     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1416 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1417 {
1418   PetscErrorCode ierr;
1419   Mat            Btrans;
1420 
1421   PetscFunctionBegin;
1422   *f   = PETSC_FALSE;
1423   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1424   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1425   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1431 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1432 {
1433   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1434   PetscErrorCode ierr;
1435   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1436   int            fd;
1437   PetscScalar    *aa;
1438   FILE           *file;
1439 
1440   PetscFunctionBegin;
1441   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1442   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1443   col_lens[0] = MAT_FILE_CLASSID;
1444 
1445   col_lens[1] = A->rmap->N;
1446   col_lens[2] = A->cmap->n;
1447   col_lens[3] = a->nz*bs2;
1448 
1449   /* store lengths of each row and write (including header) to file */
1450   count = 0;
1451   for (i=0; i<a->mbs; i++) {
1452     for (j=0; j<bs; j++) {
1453       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1454     }
1455   }
1456   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1457   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1458 
1459   /* store column indices (zero start index) */
1460   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1461   count = 0;
1462   for (i=0; i<a->mbs; i++) {
1463     for (j=0; j<bs; j++) {
1464       for (k=a->i[i]; k<a->i[i+1]; k++) {
1465         for (l=0; l<bs; l++) {
1466           jj[count++] = bs*a->j[k] + l;
1467         }
1468       }
1469     }
1470   }
1471   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1472   ierr = PetscFree(jj);CHKERRQ(ierr);
1473 
1474   /* store nonzero values */
1475   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1476   count = 0;
1477   for (i=0; i<a->mbs; i++) {
1478     for (j=0; j<bs; j++) {
1479       for (k=a->i[i]; k<a->i[i+1]; k++) {
1480         for (l=0; l<bs; l++) {
1481           aa[count++] = a->a[bs2*k + l*bs + j];
1482         }
1483       }
1484     }
1485   }
1486   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1487   ierr = PetscFree(aa);CHKERRQ(ierr);
1488 
1489   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1490   if (file) {
1491     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1498 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1499 {
1500   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1501   PetscErrorCode    ierr;
1502   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1503   PetscViewerFormat format;
1504 
1505   PetscFunctionBegin;
1506   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1507   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1508     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1509   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1510     const char *matname;
1511     Mat        aij;
1512     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1513     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1514     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1515     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1516     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1517   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1518       PetscFunctionReturn(0);
1519   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1520     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1521     for (i=0; i<a->mbs; i++) {
1522       for (j=0; j<bs; j++) {
1523         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1524         for (k=a->i[i]; k<a->i[i+1]; k++) {
1525           for (l=0; l<bs; l++) {
1526 #if defined(PETSC_USE_COMPLEX)
1527             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1528               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1529                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1530             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1532                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1533             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1535             }
1536 #else
1537             if (a->a[bs2*k + l*bs + j] != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1539             }
1540 #endif
1541           }
1542         }
1543         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1544       }
1545     }
1546     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1547   } else {
1548     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1549     for (i=0; i<a->mbs; i++) {
1550       for (j=0; j<bs; j++) {
1551         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1552         for (k=a->i[i]; k<a->i[i+1]; k++) {
1553           for (l=0; l<bs; l++) {
1554 #if defined(PETSC_USE_COMPLEX)
1555             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1556               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1557                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1558             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1559               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1560                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1561             } else {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1563             }
1564 #else
1565             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1566 #endif
1567           }
1568         }
1569         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1570       }
1571     }
1572     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1573   }
1574   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #include <petscdraw.h>
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1581 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1582 {
1583   Mat               A = (Mat) Aa;
1584   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1585   PetscErrorCode    ierr;
1586   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1587   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1588   MatScalar         *aa;
1589   PetscViewer       viewer;
1590   PetscViewerFormat format;
1591 
1592   PetscFunctionBegin;
1593   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1594   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1595 
1596   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1597 
1598   /* loop over matrix elements drawing boxes */
1599 
1600   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1601     color = PETSC_DRAW_BLUE;
1602     for (i=0,row=0; i<mbs; i++,row+=bs) {
1603       for (j=a->i[i]; j<a->i[i+1]; j++) {
1604         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1605         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1606         aa  = a->a + j*bs2;
1607         for (k=0; k<bs; k++) {
1608           for (l=0; l<bs; l++) {
1609             if (PetscRealPart(*aa++) >=  0.) continue;
1610             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1611           }
1612         }
1613       }
1614     }
1615     color = PETSC_DRAW_CYAN;
1616     for (i=0,row=0; i<mbs; i++,row+=bs) {
1617       for (j=a->i[i]; j<a->i[i+1]; j++) {
1618         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1619         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1620         aa  = a->a + j*bs2;
1621         for (k=0; k<bs; k++) {
1622           for (l=0; l<bs; l++) {
1623             if (PetscRealPart(*aa++) != 0.) continue;
1624             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1625           }
1626         }
1627       }
1628     }
1629     color = PETSC_DRAW_RED;
1630     for (i=0,row=0; i<mbs; i++,row+=bs) {
1631       for (j=a->i[i]; j<a->i[i+1]; j++) {
1632         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1633         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1634         aa  = a->a + j*bs2;
1635         for (k=0; k<bs; k++) {
1636           for (l=0; l<bs; l++) {
1637             if (PetscRealPart(*aa++) <= 0.) continue;
1638             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1639           }
1640         }
1641       }
1642     }
1643   } else {
1644     /* use contour shading to indicate magnitude of values */
1645     /* first determine max of all nonzero values */
1646     PetscDraw popup;
1647     PetscReal scale,maxv = 0.0;
1648 
1649     for (i=0; i<a->nz*a->bs2; i++) {
1650       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1651     }
1652     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1653     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1654     if (popup) {
1655       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1656     }
1657     for (i=0,row=0; i<mbs; i++,row+=bs) {
1658       for (j=a->i[i]; j<a->i[i+1]; j++) {
1659         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1660         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1661         aa  = a->a + j*bs2;
1662         for (k=0; k<bs; k++) {
1663           for (l=0; l<bs; l++) {
1664             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1665             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1666           }
1667         }
1668       }
1669     }
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1676 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1677 {
1678   PetscErrorCode ierr;
1679   PetscReal      xl,yl,xr,yr,w,h;
1680   PetscDraw      draw;
1681   PetscBool      isnull;
1682 
1683   PetscFunctionBegin;
1684   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1685   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1686 
1687   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1688   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1689   xr  += w;    yr += h;  xl = -w;     yl = -h;
1690   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1691   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1692   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatView_SeqBAIJ"
1698 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1699 {
1700   PetscErrorCode ierr;
1701   PetscBool      iascii,isbinary,isdraw;
1702 
1703   PetscFunctionBegin;
1704   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1705   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1707   if (iascii) {
1708     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1709   } else if (isbinary) {
1710     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1711   } else if (isdraw) {
1712     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1713   } else {
1714     Mat B;
1715     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1716     ierr = MatView(B,viewer);CHKERRQ(ierr);
1717     ierr = MatDestroy(&B);CHKERRQ(ierr);
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1725 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1726 {
1727   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1728   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1729   PetscInt    *ai = a->i,*ailen = a->ilen;
1730   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1731   MatScalar   *ap,*aa = a->a;
1732 
1733   PetscFunctionBegin;
1734   for (k=0; k<m; k++) { /* loop over rows */
1735     row = im[k]; brow = row/bs;
1736     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1737     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1738     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1739     nrow = ailen[brow];
1740     for (l=0; l<n; l++) { /* loop over columns */
1741       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1742       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1743       col  = in[l];
1744       bcol = col/bs;
1745       cidx = col%bs;
1746       ridx = row%bs;
1747       high = nrow;
1748       low  = 0; /* assume unsorted */
1749       while (high-low > 5) {
1750         t = (low+high)/2;
1751         if (rp[t] > bcol) high = t;
1752         else             low  = t;
1753       }
1754       for (i=low; i<high; i++) {
1755         if (rp[i] > bcol) break;
1756         if (rp[i] == bcol) {
1757           *v++ = ap[bs2*i+bs*cidx+ridx];
1758           goto finished;
1759         }
1760       }
1761       *v++ = 0.0;
1762 finished:;
1763     }
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1770 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1771 {
1772   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1773   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1774   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1775   PetscErrorCode    ierr;
1776   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1777   PetscBool         roworiented=a->roworiented;
1778   const PetscScalar *value     = v;
1779   MatScalar         *ap,*aa = a->a,*bap;
1780 
1781   PetscFunctionBegin;
1782   if (roworiented) {
1783     stepval = (n-1)*bs;
1784   } else {
1785     stepval = (m-1)*bs;
1786   }
1787   for (k=0; k<m; k++) { /* loop over added rows */
1788     row = im[k];
1789     if (row < 0) continue;
1790 #if defined(PETSC_USE_DEBUG)
1791     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1792 #endif
1793     rp   = aj + ai[row];
1794     ap   = aa + bs2*ai[row];
1795     rmax = imax[row];
1796     nrow = ailen[row];
1797     low  = 0;
1798     high = nrow;
1799     for (l=0; l<n; l++) { /* loop over added columns */
1800       if (in[l] < 0) continue;
1801 #if defined(PETSC_USE_DEBUG)
1802       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1803 #endif
1804       col = in[l];
1805       if (roworiented) {
1806         value = v + (k*(stepval+bs) + l)*bs;
1807       } else {
1808         value = v + (l*(stepval+bs) + k)*bs;
1809       }
1810       if (col <= lastcol) low = 0;
1811       else high = nrow;
1812       lastcol = col;
1813       while (high-low > 7) {
1814         t = (low+high)/2;
1815         if (rp[t] > col) high = t;
1816         else             low  = t;
1817       }
1818       for (i=low; i<high; i++) {
1819         if (rp[i] > col) break;
1820         if (rp[i] == col) {
1821           bap = ap +  bs2*i;
1822           if (roworiented) {
1823             if (is == ADD_VALUES) {
1824               for (ii=0; ii<bs; ii++,value+=stepval) {
1825                 for (jj=ii; jj<bs2; jj+=bs) {
1826                   bap[jj] += *value++;
1827                 }
1828               }
1829             } else {
1830               for (ii=0; ii<bs; ii++,value+=stepval) {
1831                 for (jj=ii; jj<bs2; jj+=bs) {
1832                   bap[jj] = *value++;
1833                 }
1834               }
1835             }
1836           } else {
1837             if (is == ADD_VALUES) {
1838               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1839                 for (jj=0; jj<bs; jj++) {
1840                   bap[jj] += value[jj];
1841                 }
1842                 bap += bs;
1843               }
1844             } else {
1845               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1846                 for (jj=0; jj<bs; jj++) {
1847                   bap[jj]  = value[jj];
1848                 }
1849                 bap += bs;
1850               }
1851             }
1852           }
1853           goto noinsert2;
1854         }
1855       }
1856       if (nonew == 1) goto noinsert2;
1857       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1858       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1859       N = nrow++ - 1; high++;
1860       /* shift up all the later entries in this row */
1861       for (ii=N; ii>=i; ii--) {
1862         rp[ii+1] = rp[ii];
1863         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1864       }
1865       if (N >= i) {
1866         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1867       }
1868       rp[i] = col;
1869       bap   = ap +  bs2*i;
1870       if (roworiented) {
1871         for (ii=0; ii<bs; ii++,value+=stepval) {
1872           for (jj=ii; jj<bs2; jj+=bs) {
1873             bap[jj] = *value++;
1874           }
1875         }
1876       } else {
1877         for (ii=0; ii<bs; ii++,value+=stepval) {
1878           for (jj=0; jj<bs; jj++) {
1879             *bap++ = *value++;
1880           }
1881         }
1882       }
1883 noinsert2:;
1884       low = i;
1885     }
1886     ailen[row] = nrow;
1887   }
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1893 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1894 {
1895   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1896   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1897   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1898   PetscErrorCode ierr;
1899   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1900   MatScalar      *aa  = a->a,*ap;
1901   PetscReal      ratio=0.6;
1902 
1903   PetscFunctionBegin;
1904   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1905 
1906   if (m) rmax = ailen[0];
1907   for (i=1; i<mbs; i++) {
1908     /* move each row back by the amount of empty slots (fshift) before it*/
1909     fshift += imax[i-1] - ailen[i-1];
1910     rmax    = PetscMax(rmax,ailen[i]);
1911     if (fshift) {
1912       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1913       N  = ailen[i];
1914       for (j=0; j<N; j++) {
1915         ip[j-fshift] = ip[j];
1916 
1917         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1918       }
1919     }
1920     ai[i] = ai[i-1] + ailen[i-1];
1921   }
1922   if (mbs) {
1923     fshift += imax[mbs-1] - ailen[mbs-1];
1924     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1925   }
1926 
1927   /* reset ilen and imax for each row */
1928   a->nonzerorowcnt = 0;
1929   for (i=0; i<mbs; i++) {
1930     ailen[i] = imax[i] = ai[i+1] - ai[i];
1931     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1932   }
1933   a->nz = ai[mbs];
1934 
1935   /* diagonals may have moved, so kill the diagonal pointers */
1936   a->idiagvalid = PETSC_FALSE;
1937   if (fshift && a->diag) {
1938     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1939     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1940     a->diag = 0;
1941   }
1942   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1943   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1944   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1945   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1946 
1947   A->info.mallocs    += a->reallocs;
1948   a->reallocs         = 0;
1949   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1950   a->rmax             = rmax;
1951 
1952   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 /*
1957    This function returns an array of flags which indicate the locations of contiguous
1958    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1959    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1960    Assume: sizes should be long enough to hold all the values.
1961 */
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1964 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1965 {
1966   PetscInt  i,j,k,row;
1967   PetscBool flg;
1968 
1969   PetscFunctionBegin;
1970   for (i=0,j=0; i<n; j++) {
1971     row = idx[i];
1972     if (row%bs!=0) { /* Not the begining of a block */
1973       sizes[j] = 1;
1974       i++;
1975     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1976       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1977       i++;
1978     } else { /* Begining of the block, so check if the complete block exists */
1979       flg = PETSC_TRUE;
1980       for (k=1; k<bs; k++) {
1981         if (row+k != idx[i+k]) { /* break in the block */
1982           flg = PETSC_FALSE;
1983           break;
1984         }
1985       }
1986       if (flg) { /* No break in the bs */
1987         sizes[j] = bs;
1988         i       += bs;
1989       } else {
1990         sizes[j] = 1;
1991         i++;
1992       }
1993     }
1994   }
1995   *bs_max = j;
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2001 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2002 {
2003   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2004   PetscErrorCode    ierr;
2005   PetscInt          i,j,k,count,*rows;
2006   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2007   PetscScalar       zero = 0.0;
2008   MatScalar         *aa;
2009   const PetscScalar *xx;
2010   PetscScalar       *bb;
2011 
2012   PetscFunctionBegin;
2013   /* fix right hand side if needed */
2014   if (x && b) {
2015     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2016     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2017     for (i=0; i<is_n; i++) {
2018       bb[is_idx[i]] = diag*xx[is_idx[i]];
2019     }
2020     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2021     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2022   }
2023 
2024   /* Make a copy of the IS and  sort it */
2025   /* allocate memory for rows,sizes */
2026   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2027 
2028   /* copy IS values to rows, and sort them */
2029   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2030   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2031 
2032   if (baij->keepnonzeropattern) {
2033     for (i=0; i<is_n; i++) sizes[i] = 1;
2034     bs_max          = is_n;
2035   } else {
2036     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2037     A->nonzerostate++;
2038   }
2039 
2040   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2041     row = rows[j];
2042     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2043     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2044     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2045     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2046       if (diag != (PetscScalar)0.0) {
2047         if (baij->ilen[row/bs] > 0) {
2048           baij->ilen[row/bs]       = 1;
2049           baij->j[baij->i[row/bs]] = row/bs;
2050 
2051           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2052         }
2053         /* Now insert all the diagonal values for this bs */
2054         for (k=0; k<bs; k++) {
2055           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2056         }
2057       } else { /* (diag == 0.0) */
2058         baij->ilen[row/bs] = 0;
2059       } /* end (diag == 0.0) */
2060     } else { /* (sizes[i] != bs) */
2061 #if defined(PETSC_USE_DEBUG)
2062       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2063 #endif
2064       for (k=0; k<count; k++) {
2065         aa[0] =  zero;
2066         aa   += bs;
2067       }
2068       if (diag != (PetscScalar)0.0) {
2069         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2070       }
2071     }
2072   }
2073 
2074   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2075   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2081 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2082 {
2083   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2084   PetscErrorCode    ierr;
2085   PetscInt          i,j,k,count;
2086   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2087   PetscScalar       zero = 0.0;
2088   MatScalar         *aa;
2089   const PetscScalar *xx;
2090   PetscScalar       *bb;
2091   PetscBool         *zeroed,vecs = PETSC_FALSE;
2092 
2093   PetscFunctionBegin;
2094   /* fix right hand side if needed */
2095   if (x && b) {
2096     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2097     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2098     vecs = PETSC_TRUE;
2099   }
2100 
2101   /* zero the columns */
2102   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2103   for (i=0; i<is_n; i++) {
2104     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2105     zeroed[is_idx[i]] = PETSC_TRUE;
2106   }
2107   for (i=0; i<A->rmap->N; i++) {
2108     if (!zeroed[i]) {
2109       row = i/bs;
2110       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2111         for (k=0; k<bs; k++) {
2112           col = bs*baij->j[j] + k;
2113           if (zeroed[col]) {
2114             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2115             if (vecs) bb[i] -= aa[0]*xx[col];
2116             aa[0] = 0.0;
2117           }
2118         }
2119       }
2120     } else if (vecs) bb[i] = diag*xx[i];
2121   }
2122   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2123   if (vecs) {
2124     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2125     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2126   }
2127 
2128   /* zero the rows */
2129   for (i=0; i<is_n; i++) {
2130     row   = is_idx[i];
2131     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2132     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2133     for (k=0; k<count; k++) {
2134       aa[0] =  zero;
2135       aa   += bs;
2136     }
2137     if (diag != (PetscScalar)0.0) {
2138       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2139     }
2140   }
2141   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2147 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2148 {
2149   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2150   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2151   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2152   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2153   PetscErrorCode ierr;
2154   PetscInt       ridx,cidx,bs2=a->bs2;
2155   PetscBool      roworiented=a->roworiented;
2156   MatScalar      *ap,value,*aa=a->a,*bap;
2157 
2158   PetscFunctionBegin;
2159   for (k=0; k<m; k++) { /* loop over added rows */
2160     row  = im[k];
2161     brow = row/bs;
2162     if (row < 0) continue;
2163 #if defined(PETSC_USE_DEBUG)
2164     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2165 #endif
2166     rp   = aj + ai[brow];
2167     ap   = aa + bs2*ai[brow];
2168     rmax = imax[brow];
2169     nrow = ailen[brow];
2170     low  = 0;
2171     high = nrow;
2172     for (l=0; l<n; l++) { /* loop over added columns */
2173       if (in[l] < 0) continue;
2174 #if defined(PETSC_USE_DEBUG)
2175       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2176 #endif
2177       col  = in[l]; bcol = col/bs;
2178       ridx = row % bs; cidx = col % bs;
2179       if (roworiented) {
2180         value = v[l + k*n];
2181       } else {
2182         value = v[k + l*m];
2183       }
2184       if (col <= lastcol) low = 0; else high = nrow;
2185       lastcol = col;
2186       while (high-low > 7) {
2187         t = (low+high)/2;
2188         if (rp[t] > bcol) high = t;
2189         else              low  = t;
2190       }
2191       for (i=low; i<high; i++) {
2192         if (rp[i] > bcol) break;
2193         if (rp[i] == bcol) {
2194           bap = ap +  bs2*i + bs*cidx + ridx;
2195           if (is == ADD_VALUES) *bap += value;
2196           else                  *bap  = value;
2197           goto noinsert1;
2198         }
2199       }
2200       if (nonew == 1) goto noinsert1;
2201       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2202       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2203       N = nrow++ - 1; high++;
2204       /* shift up all the later entries in this row */
2205       for (ii=N; ii>=i; ii--) {
2206         rp[ii+1] = rp[ii];
2207         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2208       }
2209       if (N>=i) {
2210         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2211       }
2212       rp[i]                      = bcol;
2213       ap[bs2*i + bs*cidx + ridx] = value;
2214       a->nz++;
2215       A->nonzerostate++;
2216 noinsert1:;
2217       low = i;
2218     }
2219     ailen[brow] = nrow;
2220   }
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 #undef __FUNCT__
2225 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2226 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2227 {
2228   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2229   Mat            outA;
2230   PetscErrorCode ierr;
2231   PetscBool      row_identity,col_identity;
2232 
2233   PetscFunctionBegin;
2234   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2235   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2236   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2237   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2238 
2239   outA            = inA;
2240   inA->factortype = MAT_FACTOR_LU;
2241 
2242   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2243 
2244   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2245   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2246   a->row = row;
2247   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2248   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2249   a->col = col;
2250 
2251   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2252   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2253   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2254   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2255 
2256   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2257   if (!a->solve_work) {
2258     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2259     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2260   }
2261   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2267 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2268 {
2269   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2270   PetscInt    i,nz,mbs;
2271 
2272   PetscFunctionBegin;
2273   nz  = baij->maxnz;
2274   mbs = baij->mbs;
2275   for (i=0; i<nz; i++) {
2276     baij->j[i] = indices[i];
2277   }
2278   baij->nz = nz;
2279   for (i=0; i<mbs; i++) {
2280     baij->ilen[i] = baij->imax[i];
2281   }
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2287 /*@
2288     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2289        in the matrix.
2290 
2291   Input Parameters:
2292 +  mat - the SeqBAIJ matrix
2293 -  indices - the column indices
2294 
2295   Level: advanced
2296 
2297   Notes:
2298     This can be called if you have precomputed the nonzero structure of the
2299   matrix and want to provide it to the matrix object to improve the performance
2300   of the MatSetValues() operation.
2301 
2302     You MUST have set the correct numbers of nonzeros per row in the call to
2303   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2304 
2305     MUST be called before any calls to MatSetValues();
2306 
2307 @*/
2308 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2309 {
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2314   PetscValidPointer(indices,2);
2315   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2321 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2322 {
2323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2324   PetscErrorCode ierr;
2325   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2326   PetscReal      atmp;
2327   PetscScalar    *x,zero = 0.0;
2328   MatScalar      *aa;
2329   PetscInt       ncols,brow,krow,kcol;
2330 
2331   PetscFunctionBegin;
2332   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2333   bs  = A->rmap->bs;
2334   aa  = a->a;
2335   ai  = a->i;
2336   aj  = a->j;
2337   mbs = a->mbs;
2338 
2339   ierr = VecSet(v,zero);CHKERRQ(ierr);
2340   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2341   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2342   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2343   for (i=0; i<mbs; i++) {
2344     ncols = ai[1] - ai[0]; ai++;
2345     brow  = bs*i;
2346     for (j=0; j<ncols; j++) {
2347       for (kcol=0; kcol<bs; kcol++) {
2348         for (krow=0; krow<bs; krow++) {
2349           atmp = PetscAbsScalar(*aa);aa++;
2350           row  = brow + krow;   /* row index */
2351           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2352         }
2353       }
2354       aj++;
2355     }
2356   }
2357   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatCopy_SeqBAIJ"
2363 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2364 {
2365   PetscErrorCode ierr;
2366 
2367   PetscFunctionBegin;
2368   /* If the two matrices have the same copy implementation, use fast copy. */
2369   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2370     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2371     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2372     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2373 
2374     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2375     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2376     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2377   } else {
2378     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2379   }
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2385 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2386 {
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2396 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2397 {
2398   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2399 
2400   PetscFunctionBegin;
2401   *array = a->a;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2407 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2408 {
2409   PetscFunctionBegin;
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2415 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2416 {
2417   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2418   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2419   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2420   PetscErrorCode ierr;
2421 
2422   PetscFunctionBegin;
2423   /* Set the number of nonzeros in the new matrix */
2424   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2430 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2431 {
2432   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2433   PetscErrorCode ierr;
2434   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2435   PetscBLASInt   one=1;
2436 
2437   PetscFunctionBegin;
2438   if (str == SAME_NONZERO_PATTERN) {
2439     PetscScalar  alpha = a;
2440     PetscBLASInt bnz;
2441     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2442     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2443     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2444   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2445     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2446   } else {
2447     Mat      B;
2448     PetscInt *nnz;
2449     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2450     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2451     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2452     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2453     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2454     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2455     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2456     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2457     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2458     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2459     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2460     ierr = PetscFree(nnz);CHKERRQ(ierr);
2461   }
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2467 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2468 {
2469   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2470   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2471   MatScalar   *aa = a->a;
2472 
2473   PetscFunctionBegin;
2474   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2480 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2481 {
2482   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2483   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2484   MatScalar   *aa = a->a;
2485 
2486   PetscFunctionBegin;
2487   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNCT__
2492 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2493 /*
2494     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2495 */
2496 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2497 {
2498   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2499   PetscErrorCode ierr;
2500   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2501   PetscInt       nz = a->i[m],row,*jj,mr,col;
2502 
2503   PetscFunctionBegin;
2504   *nn = n;
2505   if (!ia) PetscFunctionReturn(0);
2506   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2507   else {
2508     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2509     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2510     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2511     jj   = a->j;
2512     for (i=0; i<nz; i++) {
2513       collengths[jj[i]]++;
2514     }
2515     cia[0] = oshift;
2516     for (i=0; i<n; i++) {
2517       cia[i+1] = cia[i] + collengths[i];
2518     }
2519     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2520     jj   = a->j;
2521     for (row=0; row<m; row++) {
2522       mr = a->i[row+1] - a->i[row];
2523       for (i=0; i<mr; i++) {
2524         col = *jj++;
2525 
2526         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2527       }
2528     }
2529     ierr = PetscFree(collengths);CHKERRQ(ierr);
2530     *ia  = cia; *ja = cja;
2531   }
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2537 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2538 {
2539   PetscErrorCode ierr;
2540 
2541   PetscFunctionBegin;
2542   if (!ia) PetscFunctionReturn(0);
2543   ierr = PetscFree(*ia);CHKERRQ(ierr);
2544   ierr = PetscFree(*ja);CHKERRQ(ierr);
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 /*
2549  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2550  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2551  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2552  */
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2555 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2556 {
2557   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2558   PetscErrorCode ierr;
2559   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2560   PetscInt       nz = a->i[m],row,*jj,mr,col;
2561   PetscInt       *cspidx;
2562 
2563   PetscFunctionBegin;
2564   *nn = n;
2565   if (!ia) PetscFunctionReturn(0);
2566 
2567   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2568   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2569   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2570   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2571   jj   = a->j;
2572   for (i=0; i<nz; i++) {
2573     collengths[jj[i]]++;
2574   }
2575   cia[0] = oshift;
2576   for (i=0; i<n; i++) {
2577     cia[i+1] = cia[i] + collengths[i];
2578   }
2579   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2580   jj   = a->j;
2581   for (row=0; row<m; row++) {
2582     mr = a->i[row+1] - a->i[row];
2583     for (i=0; i<mr; i++) {
2584       col = *jj++;
2585       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2586       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2587     }
2588   }
2589   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2590   *ia    = cia; *ja = cja;
2591   *spidx = cspidx;
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2597 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2598 {
2599   PetscErrorCode ierr;
2600 
2601   PetscFunctionBegin;
2602   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2603   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatShift_SeqBAIJ"
2609 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2610 {
2611   PetscErrorCode ierr;
2612   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2613 
2614   PetscFunctionBegin;
2615   if (!Y->preallocated || !aij->nz) {
2616     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2617   }
2618   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 /* -------------------------------------------------------------------*/
2623 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2624                                        MatGetRow_SeqBAIJ,
2625                                        MatRestoreRow_SeqBAIJ,
2626                                        MatMult_SeqBAIJ_N,
2627                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2628                                        MatMultTranspose_SeqBAIJ,
2629                                        MatMultTransposeAdd_SeqBAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                /* 10*/ 0,
2634                                        MatLUFactor_SeqBAIJ,
2635                                        0,
2636                                        0,
2637                                        MatTranspose_SeqBAIJ,
2638                                /* 15*/ MatGetInfo_SeqBAIJ,
2639                                        MatEqual_SeqBAIJ,
2640                                        MatGetDiagonal_SeqBAIJ,
2641                                        MatDiagonalScale_SeqBAIJ,
2642                                        MatNorm_SeqBAIJ,
2643                                /* 20*/ 0,
2644                                        MatAssemblyEnd_SeqBAIJ,
2645                                        MatSetOption_SeqBAIJ,
2646                                        MatZeroEntries_SeqBAIJ,
2647                                /* 24*/ MatZeroRows_SeqBAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                /* 29*/ MatSetUp_SeqBAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                /* 34*/ MatDuplicate_SeqBAIJ,
2658                                        0,
2659                                        0,
2660                                        MatILUFactor_SeqBAIJ,
2661                                        0,
2662                                /* 39*/ MatAXPY_SeqBAIJ,
2663                                        MatGetSubMatrices_SeqBAIJ,
2664                                        MatIncreaseOverlap_SeqBAIJ,
2665                                        MatGetValues_SeqBAIJ,
2666                                        MatCopy_SeqBAIJ,
2667                                /* 44*/ 0,
2668                                        MatScale_SeqBAIJ,
2669                                        MatShift_SeqBAIJ,
2670                                        0,
2671                                        MatZeroRowsColumns_SeqBAIJ,
2672                                /* 49*/ 0,
2673                                        MatGetRowIJ_SeqBAIJ,
2674                                        MatRestoreRowIJ_SeqBAIJ,
2675                                        MatGetColumnIJ_SeqBAIJ,
2676                                        MatRestoreColumnIJ_SeqBAIJ,
2677                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        MatSetValuesBlocked_SeqBAIJ,
2682                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2683                                        MatDestroy_SeqBAIJ,
2684                                        MatView_SeqBAIJ,
2685                                        0,
2686                                        0,
2687                                /* 64*/ 0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2693                                        0,
2694                                        MatConvert_Basic,
2695                                        0,
2696                                        0,
2697                                /* 74*/ 0,
2698                                        MatFDColoringApply_BAIJ,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                /* 79*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        MatLoad_SeqBAIJ,
2707                                /* 84*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                /* 89*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /* 94*/ 0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                /* 99*/ 0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                /*104*/ 0,
2728                                        MatRealPart_SeqBAIJ,
2729                                        MatImaginaryPart_SeqBAIJ,
2730                                        0,
2731                                        0,
2732                                /*109*/ 0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                        MatMissingDiagonal_SeqBAIJ,
2737                                /*114*/ 0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                        0,
2742                                /*119*/ 0,
2743                                        0,
2744                                        MatMultHermitianTranspose_SeqBAIJ,
2745                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2746                                        0,
2747                                /*124*/ 0,
2748                                        0,
2749                                        MatInvertBlockDiagonal_SeqBAIJ,
2750                                        0,
2751                                        0,
2752                                /*129*/ 0,
2753                                        0,
2754                                        0,
2755                                        0,
2756                                        0,
2757                                /*134*/ 0,
2758                                        0,
2759                                        0,
2760                                        0,
2761                                        0,
2762                                /*139*/ 0,
2763                                        0,
2764                                        0,
2765                                        MatFDColoringSetUp_SeqXAIJ,
2766                                        0,
2767                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2768 };
2769 
2770 #undef __FUNCT__
2771 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2772 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2773 {
2774   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2775   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2776   PetscErrorCode ierr;
2777 
2778   PetscFunctionBegin;
2779   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2780 
2781   /* allocate space for values if not already there */
2782   if (!aij->saved_values) {
2783     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2784     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2785   }
2786 
2787   /* copy values over */
2788   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2789   PetscFunctionReturn(0);
2790 }
2791 
2792 #undef __FUNCT__
2793 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2794 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2795 {
2796   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2797   PetscErrorCode ierr;
2798   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2799 
2800   PetscFunctionBegin;
2801   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2802   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2803 
2804   /* copy values over */
2805   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2810 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2811 
2812 #undef __FUNCT__
2813 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2814 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2815 {
2816   Mat_SeqBAIJ    *b;
2817   PetscErrorCode ierr;
2818   PetscInt       i,mbs,nbs,bs2;
2819   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2820 
2821   PetscFunctionBegin;
2822   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2823   if (nz == MAT_SKIP_ALLOCATION) {
2824     skipallocation = PETSC_TRUE;
2825     nz             = 0;
2826   }
2827 
2828   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2829   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2830   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2831   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2832 
2833   B->preallocated = PETSC_TRUE;
2834 
2835   mbs = B->rmap->n/bs;
2836   nbs = B->cmap->n/bs;
2837   bs2 = bs*bs;
2838 
2839   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2840 
2841   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2842   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2843   if (nnz) {
2844     for (i=0; i<mbs; i++) {
2845       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2846       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2847     }
2848   }
2849 
2850   b    = (Mat_SeqBAIJ*)B->data;
2851   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2852   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2853   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2854 
2855   if (!flg) {
2856     switch (bs) {
2857     case 1:
2858       B->ops->mult    = MatMult_SeqBAIJ_1;
2859       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2860       break;
2861     case 2:
2862       B->ops->mult    = MatMult_SeqBAIJ_2;
2863       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2864       break;
2865     case 3:
2866       B->ops->mult    = MatMult_SeqBAIJ_3;
2867       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2868       break;
2869     case 4:
2870       B->ops->mult    = MatMult_SeqBAIJ_4;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2872       break;
2873     case 5:
2874       B->ops->mult    = MatMult_SeqBAIJ_5;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2876       break;
2877     case 6:
2878       B->ops->mult    = MatMult_SeqBAIJ_6;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2880       break;
2881     case 7:
2882       B->ops->mult    = MatMult_SeqBAIJ_7;
2883       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2884       break;
2885     case 15:
2886       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2887       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2888       break;
2889     default:
2890       B->ops->mult    = MatMult_SeqBAIJ_N;
2891       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2892       break;
2893     }
2894   }
2895   B->ops->sor = MatSOR_SeqBAIJ;
2896   b->mbs = mbs;
2897   b->nbs = nbs;
2898   if (!skipallocation) {
2899     if (!b->imax) {
2900       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2901       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2902 
2903       b->free_imax_ilen = PETSC_TRUE;
2904     }
2905     /* b->ilen will count nonzeros in each block row so far. */
2906     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2907     if (!nnz) {
2908       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2909       else if (nz < 0) nz = 1;
2910       for (i=0; i<mbs; i++) b->imax[i] = nz;
2911       nz = nz*mbs;
2912     } else {
2913       nz = 0;
2914       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2915     }
2916 
2917     /* allocate the matrix space */
2918     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2919     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2920     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2921     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2922     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2923 
2924     b->singlemalloc = PETSC_TRUE;
2925     b->i[0]         = 0;
2926     for (i=1; i<mbs+1; i++) {
2927       b->i[i] = b->i[i-1] + b->imax[i-1];
2928     }
2929     b->free_a  = PETSC_TRUE;
2930     b->free_ij = PETSC_TRUE;
2931   } else {
2932     b->free_a  = PETSC_FALSE;
2933     b->free_ij = PETSC_FALSE;
2934   }
2935 
2936   b->bs2              = bs2;
2937   b->mbs              = mbs;
2938   b->nz               = 0;
2939   b->maxnz            = nz;
2940   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2941   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 #undef __FUNCT__
2946 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2947 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2948 {
2949   PetscInt       i,m,nz,nz_max=0,*nnz;
2950   PetscScalar    *values=0;
2951   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2952   PetscErrorCode ierr;
2953 
2954   PetscFunctionBegin;
2955   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2956   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2958   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2960   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2961   m    = B->rmap->n/bs;
2962 
2963   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2964   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2965   for (i=0; i<m; i++) {
2966     nz = ii[i+1]- ii[i];
2967     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2968     nz_max = PetscMax(nz_max, nz);
2969     nnz[i] = nz;
2970   }
2971   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2972   ierr = PetscFree(nnz);CHKERRQ(ierr);
2973 
2974   values = (PetscScalar*)V;
2975   if (!values) {
2976     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2977   }
2978   for (i=0; i<m; i++) {
2979     PetscInt          ncols  = ii[i+1] - ii[i];
2980     const PetscInt    *icols = jj + ii[i];
2981     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2982     if (!roworiented) {
2983       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2984     } else {
2985       PetscInt j;
2986       for (j=0; j<ncols; j++) {
2987         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2988         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2989       }
2990     }
2991   }
2992   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 /*MC
3000    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3001    block sparse compressed row format.
3002 
3003    Options Database Keys:
3004 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3005 
3006   Level: beginner
3007 
3008 .seealso: MatCreateSeqBAIJ()
3009 M*/
3010 
3011 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatCreate_SeqBAIJ"
3015 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3016 {
3017   PetscErrorCode ierr;
3018   PetscMPIInt    size;
3019   Mat_SeqBAIJ    *b;
3020 
3021   PetscFunctionBegin;
3022   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3023   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3024 
3025   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3026   B->data = (void*)b;
3027   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3028 
3029   b->row          = 0;
3030   b->col          = 0;
3031   b->icol         = 0;
3032   b->reallocs     = 0;
3033   b->saved_values = 0;
3034 
3035   b->roworiented        = PETSC_TRUE;
3036   b->nonew              = 0;
3037   b->diag               = 0;
3038   B->spptr              = 0;
3039   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3040   b->keepnonzeropattern = PETSC_FALSE;
3041 
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 #undef __FUNCT__
3057 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3058 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3059 {
3060   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3061   PetscErrorCode ierr;
3062   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3063 
3064   PetscFunctionBegin;
3065   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3066 
3067   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3068     c->imax           = a->imax;
3069     c->ilen           = a->ilen;
3070     c->free_imax_ilen = PETSC_FALSE;
3071   } else {
3072     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3073     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3074     for (i=0; i<mbs; i++) {
3075       c->imax[i] = a->imax[i];
3076       c->ilen[i] = a->ilen[i];
3077     }
3078     c->free_imax_ilen = PETSC_TRUE;
3079   }
3080 
3081   /* allocate the matrix space */
3082   if (mallocmatspace) {
3083     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3084       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3085       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3086 
3087       c->i            = a->i;
3088       c->j            = a->j;
3089       c->singlemalloc = PETSC_FALSE;
3090       c->free_a       = PETSC_TRUE;
3091       c->free_ij      = PETSC_FALSE;
3092       c->parent       = A;
3093       C->preallocated = PETSC_TRUE;
3094       C->assembled    = PETSC_TRUE;
3095 
3096       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3097       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3098       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3099     } else {
3100       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3101       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3102 
3103       c->singlemalloc = PETSC_TRUE;
3104       c->free_a       = PETSC_TRUE;
3105       c->free_ij      = PETSC_TRUE;
3106 
3107       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3108       if (mbs > 0) {
3109         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3110         if (cpvalues == MAT_COPY_VALUES) {
3111           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3112         } else {
3113           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3114         }
3115       }
3116       C->preallocated = PETSC_TRUE;
3117       C->assembled    = PETSC_TRUE;
3118     }
3119   }
3120 
3121   c->roworiented = a->roworiented;
3122   c->nonew       = a->nonew;
3123 
3124   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3125   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3126 
3127   c->bs2         = a->bs2;
3128   c->mbs         = a->mbs;
3129   c->nbs         = a->nbs;
3130 
3131   if (a->diag) {
3132     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3133       c->diag      = a->diag;
3134       c->free_diag = PETSC_FALSE;
3135     } else {
3136       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3137       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3138       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3139       c->free_diag = PETSC_TRUE;
3140     }
3141   } else c->diag = 0;
3142 
3143   c->nz         = a->nz;
3144   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3145   c->solve_work = NULL;
3146   c->mult_work  = NULL;
3147   c->sor_workt  = NULL;
3148   c->sor_work   = NULL;
3149 
3150   c->compressedrow.use   = a->compressedrow.use;
3151   c->compressedrow.nrows = a->compressedrow.nrows;
3152   if (a->compressedrow.use) {
3153     i    = a->compressedrow.nrows;
3154     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3155     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3156     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3157     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3158   } else {
3159     c->compressedrow.use    = PETSC_FALSE;
3160     c->compressedrow.i      = NULL;
3161     c->compressedrow.rindex = NULL;
3162   }
3163   C->nonzerostate = A->nonzerostate;
3164 
3165   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3166   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3167   PetscFunctionReturn(0);
3168 }
3169 
3170 #undef __FUNCT__
3171 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3172 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3173 {
3174   PetscErrorCode ierr;
3175 
3176   PetscFunctionBegin;
3177   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3178   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3179   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3180   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 #undef __FUNCT__
3185 #define __FUNCT__ "MatLoad_SeqBAIJ"
3186 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3187 {
3188   Mat_SeqBAIJ    *a;
3189   PetscErrorCode ierr;
3190   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3191   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3192   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3193   PetscInt       *masked,nmask,tmp,bs2,ishift;
3194   PetscMPIInt    size;
3195   int            fd;
3196   PetscScalar    *aa;
3197   MPI_Comm       comm;
3198 
3199   PetscFunctionBegin;
3200   /* force binary viewer to load .info file if it has not yet done so */
3201   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3202   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3203   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3204   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3205   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3206   if (bs < 0) bs = 1;
3207   bs2  = bs*bs;
3208 
3209   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3210   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3211   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3212   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3213   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3214   M = header[1]; N = header[2]; nz = header[3];
3215 
3216   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3217   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3218 
3219   /*
3220      This code adds extra rows to make sure the number of rows is
3221     divisible by the blocksize
3222   */
3223   mbs        = M/bs;
3224   extra_rows = bs - M + bs*(mbs);
3225   if (extra_rows == bs) extra_rows = 0;
3226   else mbs++;
3227   if (extra_rows) {
3228     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3229   }
3230 
3231   /* Set global sizes if not already set */
3232   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3233     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3234   } else { /* Check if the matrix global sizes are correct */
3235     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3236     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3237       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3238     }
3239     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3240   }
3241 
3242   /* read in row lengths */
3243   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3244   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3245   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3246 
3247   /* read in column indices */
3248   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3249   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3250   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3251 
3252   /* loop over row lengths determining block row lengths */
3253   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3254   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3255   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3256   rowcount = 0;
3257   nzcount  = 0;
3258   for (i=0; i<mbs; i++) {
3259     nmask = 0;
3260     for (j=0; j<bs; j++) {
3261       kmax = rowlengths[rowcount];
3262       for (k=0; k<kmax; k++) {
3263         tmp = jj[nzcount++]/bs;
3264         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3265       }
3266       rowcount++;
3267     }
3268     browlengths[i] += nmask;
3269     /* zero out the mask elements we set */
3270     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3271   }
3272 
3273   /* Do preallocation  */
3274   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3275   a    = (Mat_SeqBAIJ*)newmat->data;
3276 
3277   /* set matrix "i" values */
3278   a->i[0] = 0;
3279   for (i=1; i<= mbs; i++) {
3280     a->i[i]      = a->i[i-1] + browlengths[i-1];
3281     a->ilen[i-1] = browlengths[i-1];
3282   }
3283   a->nz = 0;
3284   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3285 
3286   /* read in nonzero values */
3287   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3288   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3289   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3290 
3291   /* set "a" and "j" values into matrix */
3292   nzcount = 0; jcount = 0;
3293   for (i=0; i<mbs; i++) {
3294     nzcountb = nzcount;
3295     nmask    = 0;
3296     for (j=0; j<bs; j++) {
3297       kmax = rowlengths[i*bs+j];
3298       for (k=0; k<kmax; k++) {
3299         tmp = jj[nzcount++]/bs;
3300         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3301       }
3302     }
3303     /* sort the masked values */
3304     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3305 
3306     /* set "j" values into matrix */
3307     maskcount = 1;
3308     for (j=0; j<nmask; j++) {
3309       a->j[jcount++]  = masked[j];
3310       mask[masked[j]] = maskcount++;
3311     }
3312     /* set "a" values into matrix */
3313     ishift = bs2*a->i[i];
3314     for (j=0; j<bs; j++) {
3315       kmax = rowlengths[i*bs+j];
3316       for (k=0; k<kmax; k++) {
3317         tmp       = jj[nzcountb]/bs;
3318         block     = mask[tmp] - 1;
3319         point     = jj[nzcountb] - bs*tmp;
3320         idx       = ishift + bs2*block + j + bs*point;
3321         a->a[idx] = (MatScalar)aa[nzcountb++];
3322       }
3323     }
3324     /* zero out the mask elements we set */
3325     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3326   }
3327   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3328 
3329   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3330   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3331   ierr = PetscFree(aa);CHKERRQ(ierr);
3332   ierr = PetscFree(jj);CHKERRQ(ierr);
3333   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3334 
3335   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3336   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3337   PetscFunctionReturn(0);
3338 }
3339 
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatCreateSeqBAIJ"
3342 /*@C
3343    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3344    compressed row) format.  For good matrix assembly performance the
3345    user should preallocate the matrix storage by setting the parameter nz
3346    (or the array nnz).  By setting these parameters accurately, performance
3347    during matrix assembly can be increased by more than a factor of 50.
3348 
3349    Collective on MPI_Comm
3350 
3351    Input Parameters:
3352 +  comm - MPI communicator, set to PETSC_COMM_SELF
3353 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3354           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3355 .  m - number of rows
3356 .  n - number of columns
3357 .  nz - number of nonzero blocks  per block row (same for all rows)
3358 -  nnz - array containing the number of nonzero blocks in the various block rows
3359          (possibly different for each block row) or NULL
3360 
3361    Output Parameter:
3362 .  A - the matrix
3363 
3364    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3365    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3366    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3367 
3368    Options Database Keys:
3369 .   -mat_no_unroll - uses code that does not unroll the loops in the
3370                      block calculations (much slower)
3371 .    -mat_block_size - size of the blocks to use
3372 
3373    Level: intermediate
3374 
3375    Notes:
3376    The number of rows and columns must be divisible by blocksize.
3377 
3378    If the nnz parameter is given then the nz parameter is ignored
3379 
3380    A nonzero block is any block that as 1 or more nonzeros in it
3381 
3382    The block AIJ format is fully compatible with standard Fortran 77
3383    storage.  That is, the stored row and column indices can begin at
3384    either one (as in Fortran) or zero.  See the users' manual for details.
3385 
3386    Specify the preallocated storage with either nz or nnz (not both).
3387    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3388    allocation.  See Users-Manual: ch_mat for details.
3389    matrices.
3390 
3391 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3392 @*/
3393 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3394 {
3395   PetscErrorCode ierr;
3396 
3397   PetscFunctionBegin;
3398   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3399   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3400   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3401   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 #undef __FUNCT__
3406 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3407 /*@C
3408    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3409    per row in the matrix. For good matrix assembly performance the
3410    user should preallocate the matrix storage by setting the parameter nz
3411    (or the array nnz).  By setting these parameters accurately, performance
3412    during matrix assembly can be increased by more than a factor of 50.
3413 
3414    Collective on MPI_Comm
3415 
3416    Input Parameters:
3417 +  B - the matrix
3418 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3419           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3420 .  nz - number of block nonzeros per block row (same for all rows)
3421 -  nnz - array containing the number of block nonzeros in the various block rows
3422          (possibly different for each block row) or NULL
3423 
3424    Options Database Keys:
3425 .   -mat_no_unroll - uses code that does not unroll the loops in the
3426                      block calculations (much slower)
3427 .   -mat_block_size - size of the blocks to use
3428 
3429    Level: intermediate
3430 
3431    Notes:
3432    If the nnz parameter is given then the nz parameter is ignored
3433 
3434    You can call MatGetInfo() to get information on how effective the preallocation was;
3435    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3436    You can also run with the option -info and look for messages with the string
3437    malloc in them to see if additional memory allocation was needed.
3438 
3439    The block AIJ format is fully compatible with standard Fortran 77
3440    storage.  That is, the stored row and column indices can begin at
3441    either one (as in Fortran) or zero.  See the users' manual for details.
3442 
3443    Specify the preallocated storage with either nz or nnz (not both).
3444    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3445    allocation.  See Users-Manual: ch_mat for details.
3446 
3447 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3448 @*/
3449 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3450 {
3451   PetscErrorCode ierr;
3452 
3453   PetscFunctionBegin;
3454   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3455   PetscValidType(B,1);
3456   PetscValidLogicalCollectiveInt(B,bs,2);
3457   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3458   PetscFunctionReturn(0);
3459 }
3460 
3461 #undef __FUNCT__
3462 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3463 /*@C
3464    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3465    (the default sequential PETSc format).
3466 
3467    Collective on MPI_Comm
3468 
3469    Input Parameters:
3470 +  B - the matrix
3471 .  i - the indices into j for the start of each local row (starts with zero)
3472 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3473 -  v - optional values in the matrix
3474 
3475    Level: developer
3476 
3477    Notes:
3478    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3479    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3480    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3481    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3482    block column and the second index is over columns within a block.
3483 
3484 .keywords: matrix, aij, compressed row, sparse
3485 
3486 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3487 @*/
3488 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3489 {
3490   PetscErrorCode ierr;
3491 
3492   PetscFunctionBegin;
3493   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3494   PetscValidType(B,1);
3495   PetscValidLogicalCollectiveInt(B,bs,2);
3496   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3497   PetscFunctionReturn(0);
3498 }
3499 
3500 
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3503 /*@
3504      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3505 
3506      Collective on MPI_Comm
3507 
3508    Input Parameters:
3509 +  comm - must be an MPI communicator of size 1
3510 .  bs - size of block
3511 .  m - number of rows
3512 .  n - number of columns
3513 .  i - row indices
3514 .  j - column indices
3515 -  a - matrix values
3516 
3517    Output Parameter:
3518 .  mat - the matrix
3519 
3520    Level: advanced
3521 
3522    Notes:
3523        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3524     once the matrix is destroyed
3525 
3526        You cannot set new nonzero locations into this matrix, that will generate an error.
3527 
3528        The i and j indices are 0 based
3529 
3530        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3531 
3532       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3533       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3534       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3535       with column-major ordering within blocks.
3536 
3537 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3538 
3539 @*/
3540 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3541 {
3542   PetscErrorCode ierr;
3543   PetscInt       ii;
3544   Mat_SeqBAIJ    *baij;
3545 
3546   PetscFunctionBegin;
3547   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3548   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3549 
3550   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3551   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3552   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3553   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3554   baij = (Mat_SeqBAIJ*)(*mat)->data;
3555   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3556   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3557 
3558   baij->i = i;
3559   baij->j = j;
3560   baij->a = a;
3561 
3562   baij->singlemalloc = PETSC_FALSE;
3563   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3564   baij->free_a       = PETSC_FALSE;
3565   baij->free_ij      = PETSC_FALSE;
3566 
3567   for (ii=0; ii<m; ii++) {
3568     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3569 #if defined(PETSC_USE_DEBUG)
3570     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3571 #endif
3572   }
3573 #if defined(PETSC_USE_DEBUG)
3574   for (ii=0; ii<baij->i[m]; ii++) {
3575     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3576     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3577   }
3578 #endif
3579 
3580   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3581   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3582   PetscFunctionReturn(0);
3583 }
3584 
3585 #undef __FUNCT__
3586 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3588 {
3589   PetscErrorCode ierr;
3590 
3591   PetscFunctionBegin;
3592   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3593   PetscFunctionReturn(0);
3594 }
3595