xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a455e9261d77363e13c8b22a0e0ea831d71fb0ff)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      allowzeropivot,zeropivotdetected;
21 
22   PetscFunctionBegin;
23   allowzeropivot = PetscNot(A->erroriffailure);
24 
25   if (a->idiagvalid) {
26     if (values) *values = a->idiag;
27     PetscFunctionReturn(0);
28   }
29   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
30   diag_offset = a->diag;
31   if (!a->idiag) {
32     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
33     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
34   }
35   diag  = a->idiag;
36   mdiag = a->idiag+bs2*mbs;
37   if (values) *values = a->idiag;
38   /* factor and invert each block */
39   switch (bs) {
40   case 1:
41     for (i=0; i<mbs; i++) {
42       odiag    = v + 1*diag_offset[i];
43       diag[0]  = odiag[0];
44       mdiag[0] = odiag[0];
45       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
46       diag    += 1;
47       mdiag   += 1;
48     }
49     break;
50   case 2:
51     for (i=0; i<mbs; i++) {
52       odiag    = v + 4*diag_offset[i];
53       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
54       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
55       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
56       if (zeropivotdetected) break;
57       diag    += 4;
58       mdiag   += 4;
59     }
60     break;
61   case 3:
62     for (i=0; i<mbs; i++) {
63       odiag    = v + 9*diag_offset[i];
64       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
65       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
66       diag[8]  = odiag[8];
67       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
68       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
69       mdiag[8] = odiag[8];
70       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
71       if (zeropivotdetected) break;
72       diag    += 9;
73       mdiag   += 9;
74     }
75     break;
76   case 4:
77     for (i=0; i<mbs; i++) {
78       odiag  = v + 16*diag_offset[i];
79       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
80       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
81       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
82       if (zeropivotdetected) break;
83       diag  += 16;
84       mdiag += 16;
85     }
86     break;
87   case 5:
88     for (i=0; i<mbs; i++) {
89       odiag  = v + 25*diag_offset[i];
90       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
91       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
92       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
93       if (zeropivotdetected) break;
94       diag  += 25;
95       mdiag += 25;
96     }
97     break;
98   case 6:
99     for (i=0; i<mbs; i++) {
100       odiag  = v + 36*diag_offset[i];
101       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
104       if (zeropivotdetected) break;
105       diag  += 36;
106       mdiag += 36;
107     }
108     break;
109   case 7:
110     for (i=0; i<mbs; i++) {
111       odiag  = v + 49*diag_offset[i];
112       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
113       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
115       if (zeropivotdetected) break;
116       diag  += 49;
117       mdiag += 49;
118     }
119     break;
120   default:
121     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
122     for (i=0; i<mbs; i++) {
123       odiag  = v + bs2*diag_offset[i];
124       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
125       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
127       diag  += bs2;
128       mdiag += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   if (zeropivotdetected) {
134     A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
135   }
136   PetscFunctionReturn(0);
137 }
138 
139 #undef __FUNCT__
140 #define __FUNCT__ "MatSOR_SeqBAIJ"
141 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
142 {
143   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
144   PetscScalar       *x,*work,*w,*workt,*t;
145   const MatScalar   *v,*aa = a->a, *idiag;
146   const PetscScalar *b,*xb;
147   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
148   PetscErrorCode    ierr;
149   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
150   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
151 
152   PetscFunctionBegin;
153   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
154   its = its*lits;
155   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
156   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
157   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
158   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
159   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
160 
161   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
162 
163   if (!m) PetscFunctionReturn(0);
164   diag  = a->diag;
165   idiag = a->idiag;
166   k    = PetscMax(A->rmap->n,A->cmap->n);
167   if (!a->mult_work) {
168     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
169   }
170   if (!a->sor_workt) {
171     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
172   }
173   if (!a->sor_work) {
174     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
175   }
176   work = a->mult_work;
177   t    = a->sor_workt;
178   w    = a->sor_work;
179 
180   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
181   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
182 
183   if (flag & SOR_ZERO_INITIAL_GUESS) {
184     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
185       switch (bs) {
186       case 1:
187         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
188         t[0] = b[0];
189         i2     = 1;
190         idiag += 1;
191         for (i=1; i<m; i++) {
192           v  = aa + ai[i];
193           vi = aj + ai[i];
194           nz = diag[i] - ai[i];
195           s[0] = b[i2];
196           for (j=0; j<nz; j++) {
197             xw[0] = x[vi[j]];
198             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
199           }
200           t[i2] = s[0];
201           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
202           x[i2]  = xw[0];
203           idiag += 1;
204           i2    += 1;
205         }
206         break;
207       case 2:
208         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
209         t[0] = b[0]; t[1] = b[1];
210         i2     = 2;
211         idiag += 4;
212         for (i=1; i<m; i++) {
213           v  = aa + 4*ai[i];
214           vi = aj + ai[i];
215           nz = diag[i] - ai[i];
216           s[0] = b[i2]; s[1] = b[i2+1];
217           for (j=0; j<nz; j++) {
218             idx = 2*vi[j];
219             it  = 4*j;
220             xw[0] = x[idx]; xw[1] = x[1+idx];
221             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
222           }
223           t[i2] = s[0]; t[i2+1] = s[1];
224           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
225           x[i2]   = xw[0]; x[i2+1] = xw[1];
226           idiag  += 4;
227           i2     += 2;
228         }
229         break;
230       case 3:
231         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
232         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
233         i2     = 3;
234         idiag += 9;
235         for (i=1; i<m; i++) {
236           v  = aa + 9*ai[i];
237           vi = aj + ai[i];
238           nz = diag[i] - ai[i];
239           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
240           while (nz--) {
241             idx = 3*(*vi++);
242             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
243             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
244             v  += 9;
245           }
246           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
247           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
248           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
249           idiag  += 9;
250           i2     += 3;
251         }
252         break;
253       case 4:
254         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
255         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
256         i2     = 4;
257         idiag += 16;
258         for (i=1; i<m; i++) {
259           v  = aa + 16*ai[i];
260           vi = aj + ai[i];
261           nz = diag[i] - ai[i];
262           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
263           while (nz--) {
264             idx = 4*(*vi++);
265             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
266             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
267             v  += 16;
268           }
269           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
270           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
271           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
272           idiag  += 16;
273           i2     += 4;
274         }
275         break;
276       case 5:
277         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
278         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
279         i2     = 5;
280         idiag += 25;
281         for (i=1; i<m; i++) {
282           v  = aa + 25*ai[i];
283           vi = aj + ai[i];
284           nz = diag[i] - ai[i];
285           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
286           while (nz--) {
287             idx = 5*(*vi++);
288             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
289             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
290             v  += 25;
291           }
292           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
293           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
294           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
295           idiag  += 25;
296           i2     += 5;
297         }
298         break;
299       case 6:
300         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
301         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
302         i2     = 6;
303         idiag += 36;
304         for (i=1; i<m; i++) {
305           v  = aa + 36*ai[i];
306           vi = aj + ai[i];
307           nz = diag[i] - ai[i];
308           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
309           while (nz--) {
310             idx = 6*(*vi++);
311             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
312             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
313             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
314             v  += 36;
315           }
316           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
317           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
318           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
319           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
320           idiag  += 36;
321           i2     += 6;
322         }
323         break;
324       case 7:
325         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
326         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
327         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
328         i2     = 7;
329         idiag += 49;
330         for (i=1; i<m; i++) {
331           v  = aa + 49*ai[i];
332           vi = aj + ai[i];
333           nz = diag[i] - ai[i];
334           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
335           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
336           while (nz--) {
337             idx = 7*(*vi++);
338             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
339             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
340             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
341             v  += 49;
342           }
343           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
344           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
345           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
346           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
347           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
348           idiag  += 49;
349           i2     += 7;
350         }
351         break;
352       default:
353         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
354         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355         i2     = bs;
356         idiag += bs2;
357         for (i=1; i<m; i++) {
358           v  = aa + bs2*ai[i];
359           vi = aj + ai[i];
360           nz = diag[i] - ai[i];
361 
362           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363           /* copy all rows of x that are needed into contiguous space */
364           workt = work;
365           for (j=0; j<nz; j++) {
366             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
367             workt += bs;
368           }
369           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
370           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
371           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
372 
373           idiag += bs2;
374           i2    += bs;
375         }
376         break;
377       }
378       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
379       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
380       xb = t;
381     }
382     else xb = b;
383     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
384       idiag = a->idiag+bs2*(a->mbs-1);
385       i2 = bs * (m-1);
386       switch (bs) {
387       case 1:
388         s[0]  = xb[i2];
389         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
390         x[i2] = xw[0];
391         i2   -= 1;
392         for (i=m-2; i>=0; i--) {
393           v  = aa + (diag[i]+1);
394           vi = aj + diag[i] + 1;
395           nz = ai[i+1] - diag[i] - 1;
396           s[0] = xb[i2];
397           for (j=0; j<nz; j++) {
398             xw[0] = x[vi[j]];
399             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
400           }
401           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
402           x[i2]  = xw[0];
403           idiag -= 1;
404           i2    -= 1;
405         }
406         break;
407       case 2:
408         s[0]  = xb[i2]; s[1] = xb[i2+1];
409         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
410         x[i2] = xw[0]; x[i2+1] = xw[1];
411         i2    -= 2;
412         idiag -= 4;
413         for (i=m-2; i>=0; i--) {
414           v  = aa + 4*(diag[i] + 1);
415           vi = aj + diag[i] + 1;
416           nz = ai[i+1] - diag[i] - 1;
417           s[0] = xb[i2]; s[1] = xb[i2+1];
418           for (j=0; j<nz; j++) {
419             idx = 2*vi[j];
420             it  = 4*j;
421             xw[0] = x[idx]; xw[1] = x[1+idx];
422             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
423           }
424           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
425           x[i2]   = xw[0]; x[i2+1] = xw[1];
426           idiag  -= 4;
427           i2     -= 2;
428         }
429         break;
430       case 3:
431         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
432         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
433         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
434         i2    -= 3;
435         idiag -= 9;
436         for (i=m-2; i>=0; i--) {
437           v  = aa + 9*(diag[i]+1);
438           vi = aj + diag[i] + 1;
439           nz = ai[i+1] - diag[i] - 1;
440           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
441           while (nz--) {
442             idx = 3*(*vi++);
443             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
444             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
445             v  += 9;
446           }
447           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
448           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
449           idiag  -= 9;
450           i2     -= 3;
451         }
452         break;
453       case 4:
454         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
455         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
456         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
457         i2    -= 4;
458         idiag -= 16;
459         for (i=m-2; i>=0; i--) {
460           v  = aa + 16*(diag[i]+1);
461           vi = aj + diag[i] + 1;
462           nz = ai[i+1] - diag[i] - 1;
463           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
464           while (nz--) {
465             idx = 4*(*vi++);
466             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
467             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
468             v  += 16;
469           }
470           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
471           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
472           idiag  -= 16;
473           i2     -= 4;
474         }
475         break;
476       case 5:
477         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
478         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
479         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
480         i2    -= 5;
481         idiag -= 25;
482         for (i=m-2; i>=0; i--) {
483           v  = aa + 25*(diag[i]+1);
484           vi = aj + diag[i] + 1;
485           nz = ai[i+1] - diag[i] - 1;
486           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
487           while (nz--) {
488             idx = 5*(*vi++);
489             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
490             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
491             v  += 25;
492           }
493           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
494           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
495           idiag  -= 25;
496           i2     -= 5;
497         }
498         break;
499       case 6:
500         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
501         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503         i2    -= 6;
504         idiag -= 36;
505         for (i=m-2; i>=0; i--) {
506           v  = aa + 36*(diag[i]+1);
507           vi = aj + diag[i] + 1;
508           nz = ai[i+1] - diag[i] - 1;
509           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
510           while (nz--) {
511             idx = 6*(*vi++);
512             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
513             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
514             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
515             v  += 36;
516           }
517           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
518           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
519           idiag  -= 36;
520           i2     -= 6;
521         }
522         break;
523       case 7:
524         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
525         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
526         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
527         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
528         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
529         i2    -= 7;
530         idiag -= 49;
531         for (i=m-2; i>=0; i--) {
532           v  = aa + 49*(diag[i]+1);
533           vi = aj + diag[i] + 1;
534           nz = ai[i+1] - diag[i] - 1;
535           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
536           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
537           while (nz--) {
538             idx = 7*(*vi++);
539             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
540             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
541             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
542             v  += 49;
543           }
544           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
545           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
546           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
547           idiag  -= 49;
548           i2     -= 7;
549         }
550         break;
551       default:
552         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
553         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
554         i2    -= bs;
555         idiag -= bs2;
556         for (i=m-2; i>=0; i--) {
557           v  = aa + bs2*(diag[i]+1);
558           vi = aj + diag[i] + 1;
559           nz = ai[i+1] - diag[i] - 1;
560 
561           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
562           /* copy all rows of x that are needed into contiguous space */
563           workt = work;
564           for (j=0; j<nz; j++) {
565             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
566             workt += bs;
567           }
568           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
569           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
570 
571           idiag -= bs2;
572           i2    -= bs;
573         }
574         break;
575       }
576       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
577     }
578     its--;
579   }
580   while (its--) {
581     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
582       idiag = a->idiag;
583       i2 = 0;
584       switch (bs) {
585       case 1:
586         for (i=0; i<m; i++) {
587           v  = aa + ai[i];
588           vi = aj + ai[i];
589           nz = ai[i+1] - ai[i];
590           s[0] = b[i2];
591           for (j=0; j<nz; j++) {
592             xw[0] = x[vi[j]];
593             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
594           }
595           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
596           x[i2] += xw[0];
597           idiag += 1;
598           i2    += 1;
599         }
600         break;
601       case 2:
602         for (i=0; i<m; i++) {
603           v  = aa + 4*ai[i];
604           vi = aj + ai[i];
605           nz = ai[i+1] - ai[i];
606           s[0] = b[i2]; s[1] = b[i2+1];
607           for (j=0; j<nz; j++) {
608             idx = 2*vi[j];
609             it  = 4*j;
610             xw[0] = x[idx]; xw[1] = x[1+idx];
611             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
612           }
613           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
614           x[i2]  += xw[0]; x[i2+1] += xw[1];
615           idiag  += 4;
616           i2     += 2;
617         }
618         break;
619       case 3:
620         for (i=0; i<m; i++) {
621           v  = aa + 9*ai[i];
622           vi = aj + ai[i];
623           nz = ai[i+1] - ai[i];
624           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
625           while (nz--) {
626             idx = 3*(*vi++);
627             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
628             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
629             v  += 9;
630           }
631           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
632           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
633           idiag  += 9;
634           i2     += 3;
635         }
636         break;
637       case 4:
638         for (i=0; i<m; i++) {
639           v  = aa + 16*ai[i];
640           vi = aj + ai[i];
641           nz = ai[i+1] - ai[i];
642           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
643           while (nz--) {
644             idx = 4*(*vi++);
645             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
646             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
647             v  += 16;
648           }
649           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
650           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
651           idiag  += 16;
652           i2     += 4;
653         }
654         break;
655       case 5:
656         for (i=0; i<m; i++) {
657           v  = aa + 25*ai[i];
658           vi = aj + ai[i];
659           nz = ai[i+1] - ai[i];
660           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
661           while (nz--) {
662             idx = 5*(*vi++);
663             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
664             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
665             v  += 25;
666           }
667           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
668           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
669           idiag  += 25;
670           i2     += 5;
671         }
672         break;
673       case 6:
674         for (i=0; i<m; i++) {
675           v  = aa + 36*ai[i];
676           vi = aj + ai[i];
677           nz = ai[i+1] - ai[i];
678           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
679           while (nz--) {
680             idx = 6*(*vi++);
681             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
682             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
683             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
684             v  += 36;
685           }
686           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
687           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
688           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
689           idiag  += 36;
690           i2     += 6;
691         }
692         break;
693       case 7:
694         for (i=0; i<m; i++) {
695           v  = aa + 49*ai[i];
696           vi = aj + ai[i];
697           nz = ai[i+1] - ai[i];
698           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
699           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
700           while (nz--) {
701             idx = 7*(*vi++);
702             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
703             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
704             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
705             v  += 49;
706           }
707           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
708           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
709           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
710           idiag  += 49;
711           i2     += 7;
712         }
713         break;
714       default:
715         for (i=0; i<m; i++) {
716           v  = aa + bs2*ai[i];
717           vi = aj + ai[i];
718           nz = ai[i+1] - ai[i];
719 
720           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
721           /* copy all rows of x that are needed into contiguous space */
722           workt = work;
723           for (j=0; j<nz; j++) {
724             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
725             workt += bs;
726           }
727           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
728           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
729 
730           idiag += bs2;
731           i2    += bs;
732         }
733         break;
734       }
735       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
736     }
737     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
738       idiag = a->idiag+bs2*(a->mbs-1);
739       i2 = bs * (m-1);
740       switch (bs) {
741       case 1:
742         for (i=m-1; i>=0; i--) {
743           v  = aa + ai[i];
744           vi = aj + ai[i];
745           nz = ai[i+1] - ai[i];
746           s[0] = b[i2];
747           for (j=0; j<nz; j++) {
748             xw[0] = x[vi[j]];
749             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
750           }
751           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
752           x[i2] += xw[0];
753           idiag -= 1;
754           i2    -= 1;
755         }
756         break;
757       case 2:
758         for (i=m-1; i>=0; i--) {
759           v  = aa + 4*ai[i];
760           vi = aj + ai[i];
761           nz = ai[i+1] - ai[i];
762           s[0] = b[i2]; s[1] = b[i2+1];
763           for (j=0; j<nz; j++) {
764             idx = 2*vi[j];
765             it  = 4*j;
766             xw[0] = x[idx]; xw[1] = x[1+idx];
767             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
768           }
769           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
770           x[i2]  += xw[0]; x[i2+1] += xw[1];
771           idiag  -= 4;
772           i2     -= 2;
773         }
774         break;
775       case 3:
776         for (i=m-1; i>=0; i--) {
777           v  = aa + 9*ai[i];
778           vi = aj + ai[i];
779           nz = ai[i+1] - ai[i];
780           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
781           while (nz--) {
782             idx = 3*(*vi++);
783             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
784             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
785             v  += 9;
786           }
787           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
788           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
789           idiag  -= 9;
790           i2     -= 3;
791         }
792         break;
793       case 4:
794         for (i=m-1; i>=0; i--) {
795           v  = aa + 16*ai[i];
796           vi = aj + ai[i];
797           nz = ai[i+1] - ai[i];
798           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
799           while (nz--) {
800             idx = 4*(*vi++);
801             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
802             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
803             v  += 16;
804           }
805           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
806           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
807           idiag  -= 16;
808           i2     -= 4;
809         }
810         break;
811       case 5:
812         for (i=m-1; i>=0; i--) {
813           v  = aa + 25*ai[i];
814           vi = aj + ai[i];
815           nz = ai[i+1] - ai[i];
816           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
817           while (nz--) {
818             idx = 5*(*vi++);
819             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
820             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
821             v  += 25;
822           }
823           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
824           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
825           idiag  -= 25;
826           i2     -= 5;
827         }
828         break;
829       case 6:
830         for (i=m-1; i>=0; i--) {
831           v  = aa + 36*ai[i];
832           vi = aj + ai[i];
833           nz = ai[i+1] - ai[i];
834           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
835           while (nz--) {
836             idx = 6*(*vi++);
837             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
838             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
839             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
840             v  += 36;
841           }
842           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
843           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
844           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
845           idiag  -= 36;
846           i2     -= 6;
847         }
848         break;
849       case 7:
850         for (i=m-1; i>=0; i--) {
851           v  = aa + 49*ai[i];
852           vi = aj + ai[i];
853           nz = ai[i+1] - ai[i];
854           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
855           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
856           while (nz--) {
857             idx = 7*(*vi++);
858             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
859             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
860             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
861             v  += 49;
862           }
863           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
864           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
865           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
866           idiag  -= 49;
867           i2     -= 7;
868         }
869         break;
870       default:
871         for (i=m-1; i>=0; i--) {
872           v  = aa + bs2*ai[i];
873           vi = aj + ai[i];
874           nz = ai[i+1] - ai[i];
875 
876           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
877           /* copy all rows of x that are needed into contiguous space */
878           workt = work;
879           for (j=0; j<nz; j++) {
880             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
881             workt += bs;
882           }
883           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
884           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
885 
886           idiag -= bs2;
887           i2    -= bs;
888         }
889         break;
890       }
891       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
892     }
893   }
894   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
895   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
896   PetscFunctionReturn(0);
897 }
898 
899 
900 /*
901     Special version for direct calls from Fortran (Used in PETSc-fun3d)
902 */
903 #if defined(PETSC_HAVE_FORTRAN_CAPS)
904 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
905 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
906 #define matsetvaluesblocked4_ matsetvaluesblocked4
907 #endif
908 
909 #undef __FUNCT__
910 #define __FUNCT__ "matsetvaluesblocked4_"
911 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
912 {
913   Mat               A  = *AA;
914   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
915   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
916   PetscInt          *ai    =a->i,*ailen=a->ilen;
917   PetscInt          *aj    =a->j,stepval,lastcol = -1;
918   const PetscScalar *value = v;
919   MatScalar         *ap,*aa = a->a,*bap;
920 
921   PetscFunctionBegin;
922   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
923   stepval = (n-1)*4;
924   for (k=0; k<m; k++) { /* loop over added rows */
925     row  = im[k];
926     rp   = aj + ai[row];
927     ap   = aa + 16*ai[row];
928     nrow = ailen[row];
929     low  = 0;
930     high = nrow;
931     for (l=0; l<n; l++) { /* loop over added columns */
932       col = in[l];
933       if (col <= lastcol)  low = 0;
934       else                high = nrow;
935       lastcol = col;
936       value   = v + k*(stepval+4 + l)*4;
937       while (high-low > 7) {
938         t = (low+high)/2;
939         if (rp[t] > col) high = t;
940         else             low  = t;
941       }
942       for (i=low; i<high; i++) {
943         if (rp[i] > col) break;
944         if (rp[i] == col) {
945           bap = ap +  16*i;
946           for (ii=0; ii<4; ii++,value+=stepval) {
947             for (jj=ii; jj<16; jj+=4) {
948               bap[jj] += *value++;
949             }
950           }
951           goto noinsert2;
952         }
953       }
954       N = nrow++ - 1;
955       high++; /* added new column index thus must search to one higher than before */
956       /* shift up all the later entries in this row */
957       for (ii=N; ii>=i; ii--) {
958         rp[ii+1] = rp[ii];
959         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
960       }
961       if (N >= i) {
962         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
963       }
964       rp[i] = col;
965       bap   = ap +  16*i;
966       for (ii=0; ii<4; ii++,value+=stepval) {
967         for (jj=ii; jj<16; jj+=4) {
968           bap[jj] = *value++;
969         }
970       }
971       noinsert2:;
972       low = i;
973     }
974     ailen[row] = nrow;
975   }
976   PetscFunctionReturnVoid();
977 }
978 
979 #if defined(PETSC_HAVE_FORTRAN_CAPS)
980 #define matsetvalues4_ MATSETVALUES4
981 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
982 #define matsetvalues4_ matsetvalues4
983 #endif
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatSetValues4_"
987 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
988 {
989   Mat         A  = *AA;
990   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
991   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
992   PetscInt    *ai=a->i,*ailen=a->ilen;
993   PetscInt    *aj=a->j,brow,bcol;
994   PetscInt    ridx,cidx,lastcol = -1;
995   MatScalar   *ap,value,*aa=a->a,*bap;
996 
997   PetscFunctionBegin;
998   for (k=0; k<m; k++) { /* loop over added rows */
999     row  = im[k]; brow = row/4;
1000     rp   = aj + ai[brow];
1001     ap   = aa + 16*ai[brow];
1002     nrow = ailen[brow];
1003     low  = 0;
1004     high = nrow;
1005     for (l=0; l<n; l++) { /* loop over added columns */
1006       col   = in[l]; bcol = col/4;
1007       ridx  = row % 4; cidx = col % 4;
1008       value = v[l + k*n];
1009       if (col <= lastcol)  low = 0;
1010       else                high = nrow;
1011       lastcol = col;
1012       while (high-low > 7) {
1013         t = (low+high)/2;
1014         if (rp[t] > bcol) high = t;
1015         else              low  = t;
1016       }
1017       for (i=low; i<high; i++) {
1018         if (rp[i] > bcol) break;
1019         if (rp[i] == bcol) {
1020           bap   = ap +  16*i + 4*cidx + ridx;
1021           *bap += value;
1022           goto noinsert1;
1023         }
1024       }
1025       N = nrow++ - 1;
1026       high++; /* added new column thus must search to one higher than before */
1027       /* shift up all the later entries in this row */
1028       for (ii=N; ii>=i; ii--) {
1029         rp[ii+1] = rp[ii];
1030         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1031       }
1032       if (N>=i) {
1033         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1034       }
1035       rp[i]                    = bcol;
1036       ap[16*i + 4*cidx + ridx] = value;
1037 noinsert1:;
1038       low = i;
1039     }
1040     ailen[brow] = nrow;
1041   }
1042   PetscFunctionReturnVoid();
1043 }
1044 
1045 /*
1046      Checks for missing diagonals
1047 */
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1050 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1051 {
1052   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1053   PetscErrorCode ierr;
1054   PetscInt       *diag,*ii = a->i,i;
1055 
1056   PetscFunctionBegin;
1057   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1058   *missing = PETSC_FALSE;
1059   if (A->rmap->n > 0 && !ii) {
1060     *missing = PETSC_TRUE;
1061     if (d) *d = 0;
1062     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1063   } else {
1064     diag = a->diag;
1065     for (i=0; i<a->mbs; i++) {
1066       if (diag[i] >= ii[i+1]) {
1067         *missing = PETSC_TRUE;
1068         if (d) *d = i;
1069         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1070         break;
1071       }
1072     }
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1080 {
1081   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       i,j,m = a->mbs;
1084 
1085   PetscFunctionBegin;
1086   if (!a->diag) {
1087     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1088     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1089     a->free_diag = PETSC_TRUE;
1090   }
1091   for (i=0; i<m; i++) {
1092     a->diag[i] = a->i[i+1];
1093     for (j=a->i[i]; j<a->i[i+1]; j++) {
1094       if (a->j[j] == i) {
1095         a->diag[i] = j;
1096         break;
1097       }
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1106 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1107 {
1108   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1109   PetscErrorCode ierr;
1110   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1111   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1112 
1113   PetscFunctionBegin;
1114   *nn = n;
1115   if (!ia) PetscFunctionReturn(0);
1116   if (symmetric) {
1117     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1118     nz   = tia[n];
1119   } else {
1120     tia = a->i; tja = a->j;
1121   }
1122 
1123   if (!blockcompressed && bs > 1) {
1124     (*nn) *= bs;
1125     /* malloc & create the natural set of indices */
1126     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1127     if (n) {
1128       (*ia)[0] = 0;
1129       for (j=1; j<bs; j++) {
1130         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1131       }
1132     }
1133 
1134     for (i=1; i<n; i++) {
1135       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1136       for (j=1; j<bs; j++) {
1137         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1138       }
1139     }
1140     if (n) {
1141       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1142     }
1143 
1144     if (inja) {
1145       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1146       cnt = 0;
1147       for (i=0; i<n; i++) {
1148         for (j=0; j<bs; j++) {
1149           for (k=tia[i]; k<tia[i+1]; k++) {
1150             for (l=0; l<bs; l++) {
1151               (*ja)[cnt++] = bs*tja[k] + l;
1152             }
1153           }
1154         }
1155       }
1156     }
1157 
1158     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1159       ierr = PetscFree(tia);CHKERRQ(ierr);
1160       ierr = PetscFree(tja);CHKERRQ(ierr);
1161     }
1162   } else if (oshift == 1) {
1163     if (symmetric) {
1164       nz = tia[A->rmap->n/bs];
1165       /*  add 1 to i and j indices */
1166       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1167       *ia = tia;
1168       if (ja) {
1169         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1170         *ja = tja;
1171       }
1172     } else {
1173       nz = a->i[A->rmap->n/bs];
1174       /* malloc space and  add 1 to i and j indices */
1175       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1176       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1177       if (ja) {
1178         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1179         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1180       }
1181     }
1182   } else {
1183     *ia = tia;
1184     if (ja) *ja = tja;
1185   }
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1191 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1192 {
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   if (!ia) PetscFunctionReturn(0);
1197   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1198     ierr = PetscFree(*ia);CHKERRQ(ierr);
1199     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1200   }
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1206 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1207 {
1208   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1209   PetscErrorCode ierr;
1210 
1211   PetscFunctionBegin;
1212 #if defined(PETSC_USE_LOG)
1213   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1214 #endif
1215   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1216   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1217   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1218   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1219   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1220   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1221   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1222   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1223   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1224   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1225   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1226   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1227   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1228 
1229   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1230   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1231   ierr = PetscFree(A->data);CHKERRQ(ierr);
1232 
1233   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1249 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255   switch (op) {
1256   case MAT_ROW_ORIENTED:
1257     a->roworiented = flg;
1258     break;
1259   case MAT_KEEP_NONZERO_PATTERN:
1260     a->keepnonzeropattern = flg;
1261     break;
1262   case MAT_NEW_NONZERO_LOCATIONS:
1263     a->nonew = (flg ? 0 : 1);
1264     break;
1265   case MAT_NEW_NONZERO_LOCATION_ERR:
1266     a->nonew = (flg ? -1 : 0);
1267     break;
1268   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1269     a->nonew = (flg ? -2 : 0);
1270     break;
1271   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1272     a->nounused = (flg ? -1 : 0);
1273     break;
1274   case MAT_NEW_DIAGONALS:
1275   case MAT_IGNORE_OFF_PROC_ENTRIES:
1276   case MAT_USE_HASH_TABLE:
1277     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1278     break;
1279   case MAT_SPD:
1280   case MAT_SYMMETRIC:
1281   case MAT_STRUCTURALLY_SYMMETRIC:
1282   case MAT_HERMITIAN:
1283   case MAT_SYMMETRY_ETERNAL:
1284     /* These options are handled directly by MatSetOption() */
1285     break;
1286   default:
1287     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1288   }
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1295 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1296 {
1297   PetscErrorCode ierr;
1298   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1299   MatScalar      *aa_i;
1300   PetscScalar    *v_i;
1301 
1302   PetscFunctionBegin;
1303   bs  = A->rmap->bs;
1304   bs2 = bs*bs;
1305   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1306 
1307   bn  = row/bs;   /* Block number */
1308   bp  = row % bs; /* Block Position */
1309   M   = ai[bn+1] - ai[bn];
1310   *nz = bs*M;
1311 
1312   if (v) {
1313     *v = 0;
1314     if (*nz) {
1315       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1316       for (i=0; i<M; i++) { /* for each block in the block row */
1317         v_i  = *v + i*bs;
1318         aa_i = aa + bs2*(ai[bn] + i);
1319         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1320       }
1321     }
1322   }
1323 
1324   if (idx) {
1325     *idx = 0;
1326     if (*nz) {
1327       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1328       for (i=0; i<M; i++) { /* for each block in the block row */
1329         idx_i = *idx + i*bs;
1330         itmp  = bs*aj[ai[bn] + i];
1331         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1332       }
1333     }
1334   }
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1340 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1352 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1353 {
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1358   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1366 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1367 {
1368   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1369   Mat            C;
1370   PetscErrorCode ierr;
1371   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1372   PetscInt       *rows,*cols,bs2=a->bs2;
1373   MatScalar      *array;
1374 
1375   PetscFunctionBegin;
1376   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1377   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1378     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1379 
1380     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1381     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1382     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1383     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1384     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1385     ierr = PetscFree(col);CHKERRQ(ierr);
1386   } else {
1387     C = *B;
1388   }
1389 
1390   array = a->a;
1391   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1392   for (i=0; i<mbs; i++) {
1393     cols[0] = i*bs;
1394     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1395     len = ai[i+1] - ai[i];
1396     for (j=0; j<len; j++) {
1397       rows[0] = (*aj++)*bs;
1398       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1399       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1400       array += bs2;
1401     }
1402   }
1403   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1404 
1405   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1407 
1408   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1409     *B = C;
1410   } else {
1411     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1418 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1419 {
1420   PetscErrorCode ierr;
1421   Mat            Btrans;
1422 
1423   PetscFunctionBegin;
1424   *f   = PETSC_FALSE;
1425   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1426   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1427   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1433 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1434 {
1435   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1438   int            fd;
1439   PetscScalar    *aa;
1440   FILE           *file;
1441 
1442   PetscFunctionBegin;
1443   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1444   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1445   col_lens[0] = MAT_FILE_CLASSID;
1446 
1447   col_lens[1] = A->rmap->N;
1448   col_lens[2] = A->cmap->n;
1449   col_lens[3] = a->nz*bs2;
1450 
1451   /* store lengths of each row and write (including header) to file */
1452   count = 0;
1453   for (i=0; i<a->mbs; i++) {
1454     for (j=0; j<bs; j++) {
1455       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1456     }
1457   }
1458   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1459   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1460 
1461   /* store column indices (zero start index) */
1462   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1463   count = 0;
1464   for (i=0; i<a->mbs; i++) {
1465     for (j=0; j<bs; j++) {
1466       for (k=a->i[i]; k<a->i[i+1]; k++) {
1467         for (l=0; l<bs; l++) {
1468           jj[count++] = bs*a->j[k] + l;
1469         }
1470       }
1471     }
1472   }
1473   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1474   ierr = PetscFree(jj);CHKERRQ(ierr);
1475 
1476   /* store nonzero values */
1477   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1478   count = 0;
1479   for (i=0; i<a->mbs; i++) {
1480     for (j=0; j<bs; j++) {
1481       for (k=a->i[i]; k<a->i[i+1]; k++) {
1482         for (l=0; l<bs; l++) {
1483           aa[count++] = a->a[bs2*k + l*bs + j];
1484         }
1485       }
1486     }
1487   }
1488   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1489   ierr = PetscFree(aa);CHKERRQ(ierr);
1490 
1491   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1492   if (file) {
1493     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1500 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1501 {
1502   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1503   PetscErrorCode    ierr;
1504   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1505   PetscViewerFormat format;
1506 
1507   PetscFunctionBegin;
1508   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1509   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1510     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1511   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1512     const char *matname;
1513     Mat        aij;
1514     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1515     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1516     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1517     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1518     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1519   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1520       PetscFunctionReturn(0);
1521   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1522     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1523     for (i=0; i<a->mbs; i++) {
1524       for (j=0; j<bs; j++) {
1525         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1526         for (k=a->i[i]; k<a->i[i+1]; k++) {
1527           for (l=0; l<bs; l++) {
1528 #if defined(PETSC_USE_COMPLEX)
1529             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1530               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1531                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1532             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1533               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1534                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1535             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             }
1538 #else
1539             if (a->a[bs2*k + l*bs + j] != 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1541             }
1542 #endif
1543           }
1544         }
1545         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1546       }
1547     }
1548     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1549   } else {
1550     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1551     for (i=0; i<a->mbs; i++) {
1552       for (j=0; j<bs; j++) {
1553         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1554         for (k=a->i[i]; k<a->i[i+1]; k++) {
1555           for (l=0; l<bs; l++) {
1556 #if defined(PETSC_USE_COMPLEX)
1557             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1558               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1559                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1560             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1561               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1562                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1563             } else {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             }
1566 #else
1567             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1568 #endif
1569           }
1570         }
1571         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1572       }
1573     }
1574     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1575   }
1576   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #include <petscdraw.h>
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1583 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1584 {
1585   Mat               A = (Mat) Aa;
1586   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1587   PetscErrorCode    ierr;
1588   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1589   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1590   MatScalar         *aa;
1591   PetscViewer       viewer;
1592   PetscViewerFormat format;
1593 
1594   PetscFunctionBegin;
1595   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1596   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1597 
1598   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1599 
1600   /* loop over matrix elements drawing boxes */
1601 
1602   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1603     color = PETSC_DRAW_BLUE;
1604     for (i=0,row=0; i<mbs; i++,row+=bs) {
1605       for (j=a->i[i]; j<a->i[i+1]; j++) {
1606         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1607         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1608         aa  = a->a + j*bs2;
1609         for (k=0; k<bs; k++) {
1610           for (l=0; l<bs; l++) {
1611             if (PetscRealPart(*aa++) >=  0.) continue;
1612             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1613           }
1614         }
1615       }
1616     }
1617     color = PETSC_DRAW_CYAN;
1618     for (i=0,row=0; i<mbs; i++,row+=bs) {
1619       for (j=a->i[i]; j<a->i[i+1]; j++) {
1620         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1621         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1622         aa  = a->a + j*bs2;
1623         for (k=0; k<bs; k++) {
1624           for (l=0; l<bs; l++) {
1625             if (PetscRealPart(*aa++) != 0.) continue;
1626             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1627           }
1628         }
1629       }
1630     }
1631     color = PETSC_DRAW_RED;
1632     for (i=0,row=0; i<mbs; i++,row+=bs) {
1633       for (j=a->i[i]; j<a->i[i+1]; j++) {
1634         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1635         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1636         aa  = a->a + j*bs2;
1637         for (k=0; k<bs; k++) {
1638           for (l=0; l<bs; l++) {
1639             if (PetscRealPart(*aa++) <= 0.) continue;
1640             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1641           }
1642         }
1643       }
1644     }
1645   } else {
1646     /* use contour shading to indicate magnitude of values */
1647     /* first determine max of all nonzero values */
1648     PetscDraw popup;
1649     PetscReal scale,maxv = 0.0;
1650 
1651     for (i=0; i<a->nz*a->bs2; i++) {
1652       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1653     }
1654     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1655     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1656     if (popup) {
1657       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1658     }
1659     for (i=0,row=0; i<mbs; i++,row+=bs) {
1660       for (j=a->i[i]; j<a->i[i+1]; j++) {
1661         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1662         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1663         aa  = a->a + j*bs2;
1664         for (k=0; k<bs; k++) {
1665           for (l=0; l<bs; l++) {
1666             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1667             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1668           }
1669         }
1670       }
1671     }
1672   }
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1679 {
1680   PetscErrorCode ierr;
1681   PetscReal      xl,yl,xr,yr,w,h;
1682   PetscDraw      draw;
1683   PetscBool      isnull;
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1687   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1688 
1689   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1690   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1691   xr  += w;    yr += h;  xl = -w;     yl = -h;
1692   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1693   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1694   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatView_SeqBAIJ"
1700 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1701 {
1702   PetscErrorCode ierr;
1703   PetscBool      iascii,isbinary,isdraw;
1704 
1705   PetscFunctionBegin;
1706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1708   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1709   if (iascii) {
1710     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1711   } else if (isbinary) {
1712     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1713   } else if (isdraw) {
1714     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1715   } else {
1716     Mat B;
1717     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1718     ierr = MatView(B,viewer);CHKERRQ(ierr);
1719     ierr = MatDestroy(&B);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1727 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1728 {
1729   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1730   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1731   PetscInt    *ai = a->i,*ailen = a->ilen;
1732   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1733   MatScalar   *ap,*aa = a->a;
1734 
1735   PetscFunctionBegin;
1736   for (k=0; k<m; k++) { /* loop over rows */
1737     row = im[k]; brow = row/bs;
1738     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1739     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1740     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1741     nrow = ailen[brow];
1742     for (l=0; l<n; l++) { /* loop over columns */
1743       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1744       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1745       col  = in[l];
1746       bcol = col/bs;
1747       cidx = col%bs;
1748       ridx = row%bs;
1749       high = nrow;
1750       low  = 0; /* assume unsorted */
1751       while (high-low > 5) {
1752         t = (low+high)/2;
1753         if (rp[t] > bcol) high = t;
1754         else             low  = t;
1755       }
1756       for (i=low; i<high; i++) {
1757         if (rp[i] > bcol) break;
1758         if (rp[i] == bcol) {
1759           *v++ = ap[bs2*i+bs*cidx+ridx];
1760           goto finished;
1761         }
1762       }
1763       *v++ = 0.0;
1764 finished:;
1765     }
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1772 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1773 {
1774   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1775   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1776   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1777   PetscErrorCode    ierr;
1778   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1779   PetscBool         roworiented=a->roworiented;
1780   const PetscScalar *value     = v;
1781   MatScalar         *ap,*aa = a->a,*bap;
1782 
1783   PetscFunctionBegin;
1784   if (roworiented) {
1785     stepval = (n-1)*bs;
1786   } else {
1787     stepval = (m-1)*bs;
1788   }
1789   for (k=0; k<m; k++) { /* loop over added rows */
1790     row = im[k];
1791     if (row < 0) continue;
1792 #if defined(PETSC_USE_DEBUG)
1793     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1794 #endif
1795     rp   = aj + ai[row];
1796     ap   = aa + bs2*ai[row];
1797     rmax = imax[row];
1798     nrow = ailen[row];
1799     low  = 0;
1800     high = nrow;
1801     for (l=0; l<n; l++) { /* loop over added columns */
1802       if (in[l] < 0) continue;
1803 #if defined(PETSC_USE_DEBUG)
1804       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1805 #endif
1806       col = in[l];
1807       if (roworiented) {
1808         value = v + (k*(stepval+bs) + l)*bs;
1809       } else {
1810         value = v + (l*(stepval+bs) + k)*bs;
1811       }
1812       if (col <= lastcol) low = 0;
1813       else high = nrow;
1814       lastcol = col;
1815       while (high-low > 7) {
1816         t = (low+high)/2;
1817         if (rp[t] > col) high = t;
1818         else             low  = t;
1819       }
1820       for (i=low; i<high; i++) {
1821         if (rp[i] > col) break;
1822         if (rp[i] == col) {
1823           bap = ap +  bs2*i;
1824           if (roworiented) {
1825             if (is == ADD_VALUES) {
1826               for (ii=0; ii<bs; ii++,value+=stepval) {
1827                 for (jj=ii; jj<bs2; jj+=bs) {
1828                   bap[jj] += *value++;
1829                 }
1830               }
1831             } else {
1832               for (ii=0; ii<bs; ii++,value+=stepval) {
1833                 for (jj=ii; jj<bs2; jj+=bs) {
1834                   bap[jj] = *value++;
1835                 }
1836               }
1837             }
1838           } else {
1839             if (is == ADD_VALUES) {
1840               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1841                 for (jj=0; jj<bs; jj++) {
1842                   bap[jj] += value[jj];
1843                 }
1844                 bap += bs;
1845               }
1846             } else {
1847               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1848                 for (jj=0; jj<bs; jj++) {
1849                   bap[jj]  = value[jj];
1850                 }
1851                 bap += bs;
1852               }
1853             }
1854           }
1855           goto noinsert2;
1856         }
1857       }
1858       if (nonew == 1) goto noinsert2;
1859       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1860       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1861       N = nrow++ - 1; high++;
1862       /* shift up all the later entries in this row */
1863       for (ii=N; ii>=i; ii--) {
1864         rp[ii+1] = rp[ii];
1865         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1866       }
1867       if (N >= i) {
1868         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1869       }
1870       rp[i] = col;
1871       bap   = ap +  bs2*i;
1872       if (roworiented) {
1873         for (ii=0; ii<bs; ii++,value+=stepval) {
1874           for (jj=ii; jj<bs2; jj+=bs) {
1875             bap[jj] = *value++;
1876           }
1877         }
1878       } else {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=0; jj<bs; jj++) {
1881             *bap++ = *value++;
1882           }
1883         }
1884       }
1885 noinsert2:;
1886       low = i;
1887     }
1888     ailen[row] = nrow;
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1895 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1896 {
1897   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1898   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1899   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1900   PetscErrorCode ierr;
1901   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1902   MatScalar      *aa  = a->a,*ap;
1903   PetscReal      ratio=0.6;
1904 
1905   PetscFunctionBegin;
1906   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1907 
1908   if (m) rmax = ailen[0];
1909   for (i=1; i<mbs; i++) {
1910     /* move each row back by the amount of empty slots (fshift) before it*/
1911     fshift += imax[i-1] - ailen[i-1];
1912     rmax    = PetscMax(rmax,ailen[i]);
1913     if (fshift) {
1914       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1915       N  = ailen[i];
1916       for (j=0; j<N; j++) {
1917         ip[j-fshift] = ip[j];
1918 
1919         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1920       }
1921     }
1922     ai[i] = ai[i-1] + ailen[i-1];
1923   }
1924   if (mbs) {
1925     fshift += imax[mbs-1] - ailen[mbs-1];
1926     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1927   }
1928 
1929   /* reset ilen and imax for each row */
1930   a->nonzerorowcnt = 0;
1931   for (i=0; i<mbs; i++) {
1932     ailen[i] = imax[i] = ai[i+1] - ai[i];
1933     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1934   }
1935   a->nz = ai[mbs];
1936 
1937   /* diagonals may have moved, so kill the diagonal pointers */
1938   a->idiagvalid = PETSC_FALSE;
1939   if (fshift && a->diag) {
1940     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1941     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1942     a->diag = 0;
1943   }
1944   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1945   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1946   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1947   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1948 
1949   A->info.mallocs    += a->reallocs;
1950   a->reallocs         = 0;
1951   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1952   a->rmax             = rmax;
1953 
1954   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 /*
1959    This function returns an array of flags which indicate the locations of contiguous
1960    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1961    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1962    Assume: sizes should be long enough to hold all the values.
1963 */
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1966 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1967 {
1968   PetscInt  i,j,k,row;
1969   PetscBool flg;
1970 
1971   PetscFunctionBegin;
1972   for (i=0,j=0; i<n; j++) {
1973     row = idx[i];
1974     if (row%bs!=0) { /* Not the begining of a block */
1975       sizes[j] = 1;
1976       i++;
1977     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1978       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1979       i++;
1980     } else { /* Begining of the block, so check if the complete block exists */
1981       flg = PETSC_TRUE;
1982       for (k=1; k<bs; k++) {
1983         if (row+k != idx[i+k]) { /* break in the block */
1984           flg = PETSC_FALSE;
1985           break;
1986         }
1987       }
1988       if (flg) { /* No break in the bs */
1989         sizes[j] = bs;
1990         i       += bs;
1991       } else {
1992         sizes[j] = 1;
1993         i++;
1994       }
1995     }
1996   }
1997   *bs_max = j;
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2003 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2004 {
2005   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2006   PetscErrorCode    ierr;
2007   PetscInt          i,j,k,count,*rows;
2008   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2009   PetscScalar       zero = 0.0;
2010   MatScalar         *aa;
2011   const PetscScalar *xx;
2012   PetscScalar       *bb;
2013 
2014   PetscFunctionBegin;
2015   /* fix right hand side if needed */
2016   if (x && b) {
2017     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2018     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2019     for (i=0; i<is_n; i++) {
2020       bb[is_idx[i]] = diag*xx[is_idx[i]];
2021     }
2022     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2023     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2024   }
2025 
2026   /* Make a copy of the IS and  sort it */
2027   /* allocate memory for rows,sizes */
2028   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2029 
2030   /* copy IS values to rows, and sort them */
2031   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2032   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2033 
2034   if (baij->keepnonzeropattern) {
2035     for (i=0; i<is_n; i++) sizes[i] = 1;
2036     bs_max          = is_n;
2037   } else {
2038     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2039     A->nonzerostate++;
2040   }
2041 
2042   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2043     row = rows[j];
2044     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2045     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2046     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2047     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2048       if (diag != (PetscScalar)0.0) {
2049         if (baij->ilen[row/bs] > 0) {
2050           baij->ilen[row/bs]       = 1;
2051           baij->j[baij->i[row/bs]] = row/bs;
2052 
2053           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2054         }
2055         /* Now insert all the diagonal values for this bs */
2056         for (k=0; k<bs; k++) {
2057           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2058         }
2059       } else { /* (diag == 0.0) */
2060         baij->ilen[row/bs] = 0;
2061       } /* end (diag == 0.0) */
2062     } else { /* (sizes[i] != bs) */
2063 #if defined(PETSC_USE_DEBUG)
2064       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2065 #endif
2066       for (k=0; k<count; k++) {
2067         aa[0] =  zero;
2068         aa   += bs;
2069       }
2070       if (diag != (PetscScalar)0.0) {
2071         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2072       }
2073     }
2074   }
2075 
2076   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2077   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 #undef __FUNCT__
2082 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2083 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2084 {
2085   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2086   PetscErrorCode    ierr;
2087   PetscInt          i,j,k,count;
2088   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2089   PetscScalar       zero = 0.0;
2090   MatScalar         *aa;
2091   const PetscScalar *xx;
2092   PetscScalar       *bb;
2093   PetscBool         *zeroed,vecs = PETSC_FALSE;
2094 
2095   PetscFunctionBegin;
2096   /* fix right hand side if needed */
2097   if (x && b) {
2098     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2099     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2100     vecs = PETSC_TRUE;
2101   }
2102 
2103   /* zero the columns */
2104   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2105   for (i=0; i<is_n; i++) {
2106     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2107     zeroed[is_idx[i]] = PETSC_TRUE;
2108   }
2109   for (i=0; i<A->rmap->N; i++) {
2110     if (!zeroed[i]) {
2111       row = i/bs;
2112       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2113         for (k=0; k<bs; k++) {
2114           col = bs*baij->j[j] + k;
2115           if (zeroed[col]) {
2116             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2117             if (vecs) bb[i] -= aa[0]*xx[col];
2118             aa[0] = 0.0;
2119           }
2120         }
2121       }
2122     } else if (vecs) bb[i] = diag*xx[i];
2123   }
2124   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2125   if (vecs) {
2126     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2127     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2128   }
2129 
2130   /* zero the rows */
2131   for (i=0; i<is_n; i++) {
2132     row   = is_idx[i];
2133     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2134     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2135     for (k=0; k<count; k++) {
2136       aa[0] =  zero;
2137       aa   += bs;
2138     }
2139     if (diag != (PetscScalar)0.0) {
2140       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2141     }
2142   }
2143   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2149 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2150 {
2151   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2152   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2153   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2154   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2155   PetscErrorCode ierr;
2156   PetscInt       ridx,cidx,bs2=a->bs2;
2157   PetscBool      roworiented=a->roworiented;
2158   MatScalar      *ap,value,*aa=a->a,*bap;
2159 
2160   PetscFunctionBegin;
2161   for (k=0; k<m; k++) { /* loop over added rows */
2162     row  = im[k];
2163     brow = row/bs;
2164     if (row < 0) continue;
2165 #if defined(PETSC_USE_DEBUG)
2166     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2167 #endif
2168     rp   = aj + ai[brow];
2169     ap   = aa + bs2*ai[brow];
2170     rmax = imax[brow];
2171     nrow = ailen[brow];
2172     low  = 0;
2173     high = nrow;
2174     for (l=0; l<n; l++) { /* loop over added columns */
2175       if (in[l] < 0) continue;
2176 #if defined(PETSC_USE_DEBUG)
2177       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2178 #endif
2179       col  = in[l]; bcol = col/bs;
2180       ridx = row % bs; cidx = col % bs;
2181       if (roworiented) {
2182         value = v[l + k*n];
2183       } else {
2184         value = v[k + l*m];
2185       }
2186       if (col <= lastcol) low = 0; else high = nrow;
2187       lastcol = col;
2188       while (high-low > 7) {
2189         t = (low+high)/2;
2190         if (rp[t] > bcol) high = t;
2191         else              low  = t;
2192       }
2193       for (i=low; i<high; i++) {
2194         if (rp[i] > bcol) break;
2195         if (rp[i] == bcol) {
2196           bap = ap +  bs2*i + bs*cidx + ridx;
2197           if (is == ADD_VALUES) *bap += value;
2198           else                  *bap  = value;
2199           goto noinsert1;
2200         }
2201       }
2202       if (nonew == 1) goto noinsert1;
2203       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2204       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2205       N = nrow++ - 1; high++;
2206       /* shift up all the later entries in this row */
2207       for (ii=N; ii>=i; ii--) {
2208         rp[ii+1] = rp[ii];
2209         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2210       }
2211       if (N>=i) {
2212         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2213       }
2214       rp[i]                      = bcol;
2215       ap[bs2*i + bs*cidx + ridx] = value;
2216       a->nz++;
2217       A->nonzerostate++;
2218 noinsert1:;
2219       low = i;
2220     }
2221     ailen[brow] = nrow;
2222   }
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2228 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2229 {
2230   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2231   Mat            outA;
2232   PetscErrorCode ierr;
2233   PetscBool      row_identity,col_identity;
2234 
2235   PetscFunctionBegin;
2236   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2237   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2238   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2239   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2240 
2241   outA            = inA;
2242   inA->factortype = MAT_FACTOR_LU;
2243 
2244   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2245 
2246   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2247   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2248   a->row = row;
2249   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2250   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2251   a->col = col;
2252 
2253   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2254   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2255   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2256   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2257 
2258   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2259   if (!a->solve_work) {
2260     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2261     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2262   }
2263   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2269 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2270 {
2271   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2272   PetscInt    i,nz,mbs;
2273 
2274   PetscFunctionBegin;
2275   nz  = baij->maxnz;
2276   mbs = baij->mbs;
2277   for (i=0; i<nz; i++) {
2278     baij->j[i] = indices[i];
2279   }
2280   baij->nz = nz;
2281   for (i=0; i<mbs; i++) {
2282     baij->ilen[i] = baij->imax[i];
2283   }
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2289 /*@
2290     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2291        in the matrix.
2292 
2293   Input Parameters:
2294 +  mat - the SeqBAIJ matrix
2295 -  indices - the column indices
2296 
2297   Level: advanced
2298 
2299   Notes:
2300     This can be called if you have precomputed the nonzero structure of the
2301   matrix and want to provide it to the matrix object to improve the performance
2302   of the MatSetValues() operation.
2303 
2304     You MUST have set the correct numbers of nonzeros per row in the call to
2305   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2306 
2307     MUST be called before any calls to MatSetValues();
2308 
2309 @*/
2310 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2311 {
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2316   PetscValidPointer(indices,2);
2317   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2323 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2324 {
2325   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2326   PetscErrorCode ierr;
2327   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2328   PetscReal      atmp;
2329   PetscScalar    *x,zero = 0.0;
2330   MatScalar      *aa;
2331   PetscInt       ncols,brow,krow,kcol;
2332 
2333   PetscFunctionBegin;
2334   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2335   bs  = A->rmap->bs;
2336   aa  = a->a;
2337   ai  = a->i;
2338   aj  = a->j;
2339   mbs = a->mbs;
2340 
2341   ierr = VecSet(v,zero);CHKERRQ(ierr);
2342   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2343   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2344   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2345   for (i=0; i<mbs; i++) {
2346     ncols = ai[1] - ai[0]; ai++;
2347     brow  = bs*i;
2348     for (j=0; j<ncols; j++) {
2349       for (kcol=0; kcol<bs; kcol++) {
2350         for (krow=0; krow<bs; krow++) {
2351           atmp = PetscAbsScalar(*aa);aa++;
2352           row  = brow + krow;   /* row index */
2353           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2354         }
2355       }
2356       aj++;
2357     }
2358   }
2359   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatCopy_SeqBAIJ"
2365 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2366 {
2367   PetscErrorCode ierr;
2368 
2369   PetscFunctionBegin;
2370   /* If the two matrices have the same copy implementation, use fast copy. */
2371   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2372     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2373     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2374     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2375 
2376     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2377     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2378     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2379   } else {
2380     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2387 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2388 {
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2398 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2399 {
2400   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2401 
2402   PetscFunctionBegin;
2403   *array = a->a;
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2409 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2410 {
2411   PetscFunctionBegin;
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2417 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2418 {
2419   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2420   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2421   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   /* Set the number of nonzeros in the new matrix */
2426   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2432 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2433 {
2434   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2435   PetscErrorCode ierr;
2436   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2437   PetscBLASInt   one=1;
2438 
2439   PetscFunctionBegin;
2440   if (str == SAME_NONZERO_PATTERN) {
2441     PetscScalar  alpha = a;
2442     PetscBLASInt bnz;
2443     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2444     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2445     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2446   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2447     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2448   } else {
2449     Mat      B;
2450     PetscInt *nnz;
2451     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2452     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2453     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2454     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2455     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2456     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2457     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2458     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2459     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2460     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2461     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2462     ierr = PetscFree(nnz);CHKERRQ(ierr);
2463   }
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2469 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2470 {
2471   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473   MatScalar   *aa = a->a;
2474 
2475   PetscFunctionBegin;
2476   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2482 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2483 {
2484   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2485   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2486   MatScalar   *aa = a->a;
2487 
2488   PetscFunctionBegin;
2489   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 #undef __FUNCT__
2494 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2495 /*
2496     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2497 */
2498 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2499 {
2500   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2501   PetscErrorCode ierr;
2502   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2503   PetscInt       nz = a->i[m],row,*jj,mr,col;
2504 
2505   PetscFunctionBegin;
2506   *nn = n;
2507   if (!ia) PetscFunctionReturn(0);
2508   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2509   else {
2510     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2511     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2512     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2513     jj   = a->j;
2514     for (i=0; i<nz; i++) {
2515       collengths[jj[i]]++;
2516     }
2517     cia[0] = oshift;
2518     for (i=0; i<n; i++) {
2519       cia[i+1] = cia[i] + collengths[i];
2520     }
2521     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2522     jj   = a->j;
2523     for (row=0; row<m; row++) {
2524       mr = a->i[row+1] - a->i[row];
2525       for (i=0; i<mr; i++) {
2526         col = *jj++;
2527 
2528         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2529       }
2530     }
2531     ierr = PetscFree(collengths);CHKERRQ(ierr);
2532     *ia  = cia; *ja = cja;
2533   }
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2539 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2540 {
2541   PetscErrorCode ierr;
2542 
2543   PetscFunctionBegin;
2544   if (!ia) PetscFunctionReturn(0);
2545   ierr = PetscFree(*ia);CHKERRQ(ierr);
2546   ierr = PetscFree(*ja);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 /*
2551  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2552  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2553  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2554  */
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2557 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2558 {
2559   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2560   PetscErrorCode ierr;
2561   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2562   PetscInt       nz = a->i[m],row,*jj,mr,col;
2563   PetscInt       *cspidx;
2564 
2565   PetscFunctionBegin;
2566   *nn = n;
2567   if (!ia) PetscFunctionReturn(0);
2568 
2569   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2570   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2571   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2572   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2573   jj   = a->j;
2574   for (i=0; i<nz; i++) {
2575     collengths[jj[i]]++;
2576   }
2577   cia[0] = oshift;
2578   for (i=0; i<n; i++) {
2579     cia[i+1] = cia[i] + collengths[i];
2580   }
2581   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2582   jj   = a->j;
2583   for (row=0; row<m; row++) {
2584     mr = a->i[row+1] - a->i[row];
2585     for (i=0; i<mr; i++) {
2586       col = *jj++;
2587       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2588       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2589     }
2590   }
2591   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2592   *ia    = cia; *ja = cja;
2593   *spidx = cspidx;
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2599 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2600 {
2601   PetscErrorCode ierr;
2602 
2603   PetscFunctionBegin;
2604   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2605   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2606   PetscFunctionReturn(0);
2607 }
2608 
2609 #undef __FUNCT__
2610 #define __FUNCT__ "MatShift_SeqBAIJ"
2611 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2612 {
2613   PetscErrorCode ierr;
2614   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2615 
2616   PetscFunctionBegin;
2617   if (!Y->preallocated || !aij->nz) {
2618     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2619   }
2620   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 /* -------------------------------------------------------------------*/
2625 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2626                                        MatGetRow_SeqBAIJ,
2627                                        MatRestoreRow_SeqBAIJ,
2628                                        MatMult_SeqBAIJ_N,
2629                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2630                                        MatMultTranspose_SeqBAIJ,
2631                                        MatMultTransposeAdd_SeqBAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                /* 10*/ 0,
2636                                        MatLUFactor_SeqBAIJ,
2637                                        0,
2638                                        0,
2639                                        MatTranspose_SeqBAIJ,
2640                                /* 15*/ MatGetInfo_SeqBAIJ,
2641                                        MatEqual_SeqBAIJ,
2642                                        MatGetDiagonal_SeqBAIJ,
2643                                        MatDiagonalScale_SeqBAIJ,
2644                                        MatNorm_SeqBAIJ,
2645                                /* 20*/ 0,
2646                                        MatAssemblyEnd_SeqBAIJ,
2647                                        MatSetOption_SeqBAIJ,
2648                                        MatZeroEntries_SeqBAIJ,
2649                                /* 24*/ MatZeroRows_SeqBAIJ,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                /* 29*/ MatSetUp_SeqBAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                /* 34*/ MatDuplicate_SeqBAIJ,
2660                                        0,
2661                                        0,
2662                                        MatILUFactor_SeqBAIJ,
2663                                        0,
2664                                /* 39*/ MatAXPY_SeqBAIJ,
2665                                        MatGetSubMatrices_SeqBAIJ,
2666                                        MatIncreaseOverlap_SeqBAIJ,
2667                                        MatGetValues_SeqBAIJ,
2668                                        MatCopy_SeqBAIJ,
2669                                /* 44*/ 0,
2670                                        MatScale_SeqBAIJ,
2671                                        MatShift_SeqBAIJ,
2672                                        0,
2673                                        MatZeroRowsColumns_SeqBAIJ,
2674                                /* 49*/ 0,
2675                                        MatGetRowIJ_SeqBAIJ,
2676                                        MatRestoreRowIJ_SeqBAIJ,
2677                                        MatGetColumnIJ_SeqBAIJ,
2678                                        MatRestoreColumnIJ_SeqBAIJ,
2679                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatSetValuesBlocked_SeqBAIJ,
2684                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2685                                        MatDestroy_SeqBAIJ,
2686                                        MatView_SeqBAIJ,
2687                                        0,
2688                                        0,
2689                                /* 64*/ 0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2695                                        0,
2696                                        MatConvert_Basic,
2697                                        0,
2698                                        0,
2699                                /* 74*/ 0,
2700                                        MatFDColoringApply_BAIJ,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                /* 79*/ 0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        MatLoad_SeqBAIJ,
2709                                /* 84*/ 0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                /* 89*/ 0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                /* 94*/ 0,
2720                                        0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                /* 99*/ 0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                /*104*/ 0,
2730                                        MatRealPart_SeqBAIJ,
2731                                        MatImaginaryPart_SeqBAIJ,
2732                                        0,
2733                                        0,
2734                                /*109*/ 0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                        MatMissingDiagonal_SeqBAIJ,
2739                                /*114*/ 0,
2740                                        0,
2741                                        0,
2742                                        0,
2743                                        0,
2744                                /*119*/ 0,
2745                                        0,
2746                                        MatMultHermitianTranspose_SeqBAIJ,
2747                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2748                                        0,
2749                                /*124*/ 0,
2750                                        0,
2751                                        MatInvertBlockDiagonal_SeqBAIJ,
2752                                        0,
2753                                        0,
2754                                /*129*/ 0,
2755                                        0,
2756                                        0,
2757                                        0,
2758                                        0,
2759                                /*134*/ 0,
2760                                        0,
2761                                        0,
2762                                        0,
2763                                        0,
2764                                /*139*/ 0,
2765                                        0,
2766                                        0,
2767                                        MatFDColoringSetUp_SeqXAIJ,
2768                                        0,
2769                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2770 };
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2774 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2775 {
2776   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2777   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2778   PetscErrorCode ierr;
2779 
2780   PetscFunctionBegin;
2781   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2782 
2783   /* allocate space for values if not already there */
2784   if (!aij->saved_values) {
2785     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2786     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2787   }
2788 
2789   /* copy values over */
2790   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2796 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2797 {
2798   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2799   PetscErrorCode ierr;
2800   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2801 
2802   PetscFunctionBegin;
2803   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2804   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2805 
2806   /* copy values over */
2807   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2812 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2816 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2817 {
2818   Mat_SeqBAIJ    *b;
2819   PetscErrorCode ierr;
2820   PetscInt       i,mbs,nbs,bs2;
2821   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2822 
2823   PetscFunctionBegin;
2824   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2825   if (nz == MAT_SKIP_ALLOCATION) {
2826     skipallocation = PETSC_TRUE;
2827     nz             = 0;
2828   }
2829 
2830   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2831   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2832   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2833   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2834 
2835   B->preallocated = PETSC_TRUE;
2836 
2837   mbs = B->rmap->n/bs;
2838   nbs = B->cmap->n/bs;
2839   bs2 = bs*bs;
2840 
2841   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2842 
2843   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2844   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2845   if (nnz) {
2846     for (i=0; i<mbs; i++) {
2847       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2848       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2849     }
2850   }
2851 
2852   b    = (Mat_SeqBAIJ*)B->data;
2853   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2854   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2855   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2856 
2857   if (!flg) {
2858     switch (bs) {
2859     case 1:
2860       B->ops->mult    = MatMult_SeqBAIJ_1;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2862       break;
2863     case 2:
2864       B->ops->mult    = MatMult_SeqBAIJ_2;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2866       break;
2867     case 3:
2868       B->ops->mult    = MatMult_SeqBAIJ_3;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2870       break;
2871     case 4:
2872       B->ops->mult    = MatMult_SeqBAIJ_4;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2874       break;
2875     case 5:
2876       B->ops->mult    = MatMult_SeqBAIJ_5;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2878       break;
2879     case 6:
2880       B->ops->mult    = MatMult_SeqBAIJ_6;
2881       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2882       break;
2883     case 7:
2884       B->ops->mult    = MatMult_SeqBAIJ_7;
2885       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2886       break;
2887     case 15:
2888       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2889       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2890       break;
2891     default:
2892       B->ops->mult    = MatMult_SeqBAIJ_N;
2893       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2894       break;
2895     }
2896   }
2897   B->ops->sor = MatSOR_SeqBAIJ;
2898   b->mbs = mbs;
2899   b->nbs = nbs;
2900   if (!skipallocation) {
2901     if (!b->imax) {
2902       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2903       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2904 
2905       b->free_imax_ilen = PETSC_TRUE;
2906     }
2907     /* b->ilen will count nonzeros in each block row so far. */
2908     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2909     if (!nnz) {
2910       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2911       else if (nz < 0) nz = 1;
2912       for (i=0; i<mbs; i++) b->imax[i] = nz;
2913       nz = nz*mbs;
2914     } else {
2915       nz = 0;
2916       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2917     }
2918 
2919     /* allocate the matrix space */
2920     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2921     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2922     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2923     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2924     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2925 
2926     b->singlemalloc = PETSC_TRUE;
2927     b->i[0]         = 0;
2928     for (i=1; i<mbs+1; i++) {
2929       b->i[i] = b->i[i-1] + b->imax[i-1];
2930     }
2931     b->free_a  = PETSC_TRUE;
2932     b->free_ij = PETSC_TRUE;
2933   } else {
2934     b->free_a  = PETSC_FALSE;
2935     b->free_ij = PETSC_FALSE;
2936   }
2937 
2938   b->bs2              = bs2;
2939   b->mbs              = mbs;
2940   b->nz               = 0;
2941   b->maxnz            = nz;
2942   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2943   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 #undef __FUNCT__
2948 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2949 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2950 {
2951   PetscInt       i,m,nz,nz_max=0,*nnz;
2952   PetscScalar    *values=0;
2953   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2954   PetscErrorCode ierr;
2955 
2956   PetscFunctionBegin;
2957   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2958   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2960   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2961   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2962   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2963   m    = B->rmap->n/bs;
2964 
2965   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2966   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2967   for (i=0; i<m; i++) {
2968     nz = ii[i+1]- ii[i];
2969     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2970     nz_max = PetscMax(nz_max, nz);
2971     nnz[i] = nz;
2972   }
2973   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2974   ierr = PetscFree(nnz);CHKERRQ(ierr);
2975 
2976   values = (PetscScalar*)V;
2977   if (!values) {
2978     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2979   }
2980   for (i=0; i<m; i++) {
2981     PetscInt          ncols  = ii[i+1] - ii[i];
2982     const PetscInt    *icols = jj + ii[i];
2983     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2984     if (!roworiented) {
2985       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2986     } else {
2987       PetscInt j;
2988       for (j=0; j<ncols; j++) {
2989         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2990         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2991       }
2992     }
2993   }
2994   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2995   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2996   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2997   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 /*MC
3002    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3003    block sparse compressed row format.
3004 
3005    Options Database Keys:
3006 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3007 
3008   Level: beginner
3009 
3010 .seealso: MatCreateSeqBAIJ()
3011 M*/
3012 
3013 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3014 
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatCreate_SeqBAIJ"
3017 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3018 {
3019   PetscErrorCode ierr;
3020   PetscMPIInt    size;
3021   Mat_SeqBAIJ    *b;
3022 
3023   PetscFunctionBegin;
3024   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3025   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3026 
3027   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3028   B->data = (void*)b;
3029   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3030 
3031   b->row          = 0;
3032   b->col          = 0;
3033   b->icol         = 0;
3034   b->reallocs     = 0;
3035   b->saved_values = 0;
3036 
3037   b->roworiented        = PETSC_TRUE;
3038   b->nonew              = 0;
3039   b->diag               = 0;
3040   B->spptr              = 0;
3041   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3042   b->keepnonzeropattern = PETSC_FALSE;
3043 
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 #undef __FUNCT__
3059 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3060 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3061 {
3062   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3063   PetscErrorCode ierr;
3064   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3065 
3066   PetscFunctionBegin;
3067   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3068 
3069   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3070     c->imax           = a->imax;
3071     c->ilen           = a->ilen;
3072     c->free_imax_ilen = PETSC_FALSE;
3073   } else {
3074     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3075     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3076     for (i=0; i<mbs; i++) {
3077       c->imax[i] = a->imax[i];
3078       c->ilen[i] = a->ilen[i];
3079     }
3080     c->free_imax_ilen = PETSC_TRUE;
3081   }
3082 
3083   /* allocate the matrix space */
3084   if (mallocmatspace) {
3085     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3086       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3087       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3088 
3089       c->i            = a->i;
3090       c->j            = a->j;
3091       c->singlemalloc = PETSC_FALSE;
3092       c->free_a       = PETSC_TRUE;
3093       c->free_ij      = PETSC_FALSE;
3094       c->parent       = A;
3095       C->preallocated = PETSC_TRUE;
3096       C->assembled    = PETSC_TRUE;
3097 
3098       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3099       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3100       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3101     } else {
3102       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3103       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3104 
3105       c->singlemalloc = PETSC_TRUE;
3106       c->free_a       = PETSC_TRUE;
3107       c->free_ij      = PETSC_TRUE;
3108 
3109       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3110       if (mbs > 0) {
3111         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3112         if (cpvalues == MAT_COPY_VALUES) {
3113           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3114         } else {
3115           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3116         }
3117       }
3118       C->preallocated = PETSC_TRUE;
3119       C->assembled    = PETSC_TRUE;
3120     }
3121   }
3122 
3123   c->roworiented = a->roworiented;
3124   c->nonew       = a->nonew;
3125 
3126   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3127   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3128 
3129   c->bs2         = a->bs2;
3130   c->mbs         = a->mbs;
3131   c->nbs         = a->nbs;
3132 
3133   if (a->diag) {
3134     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3135       c->diag      = a->diag;
3136       c->free_diag = PETSC_FALSE;
3137     } else {
3138       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3139       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3140       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3141       c->free_diag = PETSC_TRUE;
3142     }
3143   } else c->diag = 0;
3144 
3145   c->nz         = a->nz;
3146   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3147   c->solve_work = NULL;
3148   c->mult_work  = NULL;
3149   c->sor_workt  = NULL;
3150   c->sor_work   = NULL;
3151 
3152   c->compressedrow.use   = a->compressedrow.use;
3153   c->compressedrow.nrows = a->compressedrow.nrows;
3154   if (a->compressedrow.use) {
3155     i    = a->compressedrow.nrows;
3156     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3157     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3158     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3159     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3160   } else {
3161     c->compressedrow.use    = PETSC_FALSE;
3162     c->compressedrow.i      = NULL;
3163     c->compressedrow.rindex = NULL;
3164   }
3165   C->nonzerostate = A->nonzerostate;
3166 
3167   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3168   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3169   PetscFunctionReturn(0);
3170 }
3171 
3172 #undef __FUNCT__
3173 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3174 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3175 {
3176   PetscErrorCode ierr;
3177 
3178   PetscFunctionBegin;
3179   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3180   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3181   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3182   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3183   PetscFunctionReturn(0);
3184 }
3185 
3186 #undef __FUNCT__
3187 #define __FUNCT__ "MatLoad_SeqBAIJ"
3188 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3189 {
3190   Mat_SeqBAIJ    *a;
3191   PetscErrorCode ierr;
3192   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3193   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3194   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3195   PetscInt       *masked,nmask,tmp,bs2,ishift;
3196   PetscMPIInt    size;
3197   int            fd;
3198   PetscScalar    *aa;
3199   MPI_Comm       comm;
3200 
3201   PetscFunctionBegin;
3202   /* force binary viewer to load .info file if it has not yet done so */
3203   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3204   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3205   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3206   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3207   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3208   if (bs < 0) bs = 1;
3209   bs2  = bs*bs;
3210 
3211   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3212   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3213   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3214   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3215   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3216   M = header[1]; N = header[2]; nz = header[3];
3217 
3218   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3219   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3220 
3221   /*
3222      This code adds extra rows to make sure the number of rows is
3223     divisible by the blocksize
3224   */
3225   mbs        = M/bs;
3226   extra_rows = bs - M + bs*(mbs);
3227   if (extra_rows == bs) extra_rows = 0;
3228   else mbs++;
3229   if (extra_rows) {
3230     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3231   }
3232 
3233   /* Set global sizes if not already set */
3234   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3235     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3236   } else { /* Check if the matrix global sizes are correct */
3237     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3238     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3239       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3240     }
3241     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3242   }
3243 
3244   /* read in row lengths */
3245   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3246   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3247   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3248 
3249   /* read in column indices */
3250   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3251   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3252   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3253 
3254   /* loop over row lengths determining block row lengths */
3255   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3256   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3257   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3258   rowcount = 0;
3259   nzcount  = 0;
3260   for (i=0; i<mbs; i++) {
3261     nmask = 0;
3262     for (j=0; j<bs; j++) {
3263       kmax = rowlengths[rowcount];
3264       for (k=0; k<kmax; k++) {
3265         tmp = jj[nzcount++]/bs;
3266         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3267       }
3268       rowcount++;
3269     }
3270     browlengths[i] += nmask;
3271     /* zero out the mask elements we set */
3272     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3273   }
3274 
3275   /* Do preallocation  */
3276   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3277   a    = (Mat_SeqBAIJ*)newmat->data;
3278 
3279   /* set matrix "i" values */
3280   a->i[0] = 0;
3281   for (i=1; i<= mbs; i++) {
3282     a->i[i]      = a->i[i-1] + browlengths[i-1];
3283     a->ilen[i-1] = browlengths[i-1];
3284   }
3285   a->nz = 0;
3286   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3287 
3288   /* read in nonzero values */
3289   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3290   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3291   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3292 
3293   /* set "a" and "j" values into matrix */
3294   nzcount = 0; jcount = 0;
3295   for (i=0; i<mbs; i++) {
3296     nzcountb = nzcount;
3297     nmask    = 0;
3298     for (j=0; j<bs; j++) {
3299       kmax = rowlengths[i*bs+j];
3300       for (k=0; k<kmax; k++) {
3301         tmp = jj[nzcount++]/bs;
3302         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3303       }
3304     }
3305     /* sort the masked values */
3306     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3307 
3308     /* set "j" values into matrix */
3309     maskcount = 1;
3310     for (j=0; j<nmask; j++) {
3311       a->j[jcount++]  = masked[j];
3312       mask[masked[j]] = maskcount++;
3313     }
3314     /* set "a" values into matrix */
3315     ishift = bs2*a->i[i];
3316     for (j=0; j<bs; j++) {
3317       kmax = rowlengths[i*bs+j];
3318       for (k=0; k<kmax; k++) {
3319         tmp       = jj[nzcountb]/bs;
3320         block     = mask[tmp] - 1;
3321         point     = jj[nzcountb] - bs*tmp;
3322         idx       = ishift + bs2*block + j + bs*point;
3323         a->a[idx] = (MatScalar)aa[nzcountb++];
3324       }
3325     }
3326     /* zero out the mask elements we set */
3327     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3328   }
3329   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3330 
3331   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3332   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3333   ierr = PetscFree(aa);CHKERRQ(ierr);
3334   ierr = PetscFree(jj);CHKERRQ(ierr);
3335   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3336 
3337   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3338   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3339   PetscFunctionReturn(0);
3340 }
3341 
3342 #undef __FUNCT__
3343 #define __FUNCT__ "MatCreateSeqBAIJ"
3344 /*@C
3345    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3346    compressed row) format.  For good matrix assembly performance the
3347    user should preallocate the matrix storage by setting the parameter nz
3348    (or the array nnz).  By setting these parameters accurately, performance
3349    during matrix assembly can be increased by more than a factor of 50.
3350 
3351    Collective on MPI_Comm
3352 
3353    Input Parameters:
3354 +  comm - MPI communicator, set to PETSC_COMM_SELF
3355 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3356           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3357 .  m - number of rows
3358 .  n - number of columns
3359 .  nz - number of nonzero blocks  per block row (same for all rows)
3360 -  nnz - array containing the number of nonzero blocks in the various block rows
3361          (possibly different for each block row) or NULL
3362 
3363    Output Parameter:
3364 .  A - the matrix
3365 
3366    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3367    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3368    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3369 
3370    Options Database Keys:
3371 .   -mat_no_unroll - uses code that does not unroll the loops in the
3372                      block calculations (much slower)
3373 .    -mat_block_size - size of the blocks to use
3374 
3375    Level: intermediate
3376 
3377    Notes:
3378    The number of rows and columns must be divisible by blocksize.
3379 
3380    If the nnz parameter is given then the nz parameter is ignored
3381 
3382    A nonzero block is any block that as 1 or more nonzeros in it
3383 
3384    The block AIJ format is fully compatible with standard Fortran 77
3385    storage.  That is, the stored row and column indices can begin at
3386    either one (as in Fortran) or zero.  See the users' manual for details.
3387 
3388    Specify the preallocated storage with either nz or nnz (not both).
3389    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3390    allocation.  See Users-Manual: ch_mat for details.
3391    matrices.
3392 
3393 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3394 @*/
3395 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3396 {
3397   PetscErrorCode ierr;
3398 
3399   PetscFunctionBegin;
3400   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3401   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3402   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3403   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3404   PetscFunctionReturn(0);
3405 }
3406 
3407 #undef __FUNCT__
3408 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3409 /*@C
3410    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3411    per row in the matrix. For good matrix assembly performance the
3412    user should preallocate the matrix storage by setting the parameter nz
3413    (or the array nnz).  By setting these parameters accurately, performance
3414    during matrix assembly can be increased by more than a factor of 50.
3415 
3416    Collective on MPI_Comm
3417 
3418    Input Parameters:
3419 +  B - the matrix
3420 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3421           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3422 .  nz - number of block nonzeros per block row (same for all rows)
3423 -  nnz - array containing the number of block nonzeros in the various block rows
3424          (possibly different for each block row) or NULL
3425 
3426    Options Database Keys:
3427 .   -mat_no_unroll - uses code that does not unroll the loops in the
3428                      block calculations (much slower)
3429 .   -mat_block_size - size of the blocks to use
3430 
3431    Level: intermediate
3432 
3433    Notes:
3434    If the nnz parameter is given then the nz parameter is ignored
3435 
3436    You can call MatGetInfo() to get information on how effective the preallocation was;
3437    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3438    You can also run with the option -info and look for messages with the string
3439    malloc in them to see if additional memory allocation was needed.
3440 
3441    The block AIJ format is fully compatible with standard Fortran 77
3442    storage.  That is, the stored row and column indices can begin at
3443    either one (as in Fortran) or zero.  See the users' manual for details.
3444 
3445    Specify the preallocated storage with either nz or nnz (not both).
3446    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3447    allocation.  See Users-Manual: ch_mat for details.
3448 
3449 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3450 @*/
3451 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3452 {
3453   PetscErrorCode ierr;
3454 
3455   PetscFunctionBegin;
3456   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3457   PetscValidType(B,1);
3458   PetscValidLogicalCollectiveInt(B,bs,2);
3459   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3460   PetscFunctionReturn(0);
3461 }
3462 
3463 #undef __FUNCT__
3464 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3465 /*@C
3466    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3467    (the default sequential PETSc format).
3468 
3469    Collective on MPI_Comm
3470 
3471    Input Parameters:
3472 +  B - the matrix
3473 .  i - the indices into j for the start of each local row (starts with zero)
3474 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3475 -  v - optional values in the matrix
3476 
3477    Level: developer
3478 
3479    Notes:
3480    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3481    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3482    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3483    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3484    block column and the second index is over columns within a block.
3485 
3486 .keywords: matrix, aij, compressed row, sparse
3487 
3488 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3489 @*/
3490 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3491 {
3492   PetscErrorCode ierr;
3493 
3494   PetscFunctionBegin;
3495   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3496   PetscValidType(B,1);
3497   PetscValidLogicalCollectiveInt(B,bs,2);
3498   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3499   PetscFunctionReturn(0);
3500 }
3501 
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3505 /*@
3506      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3507 
3508      Collective on MPI_Comm
3509 
3510    Input Parameters:
3511 +  comm - must be an MPI communicator of size 1
3512 .  bs - size of block
3513 .  m - number of rows
3514 .  n - number of columns
3515 .  i - row indices
3516 .  j - column indices
3517 -  a - matrix values
3518 
3519    Output Parameter:
3520 .  mat - the matrix
3521 
3522    Level: advanced
3523 
3524    Notes:
3525        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3526     once the matrix is destroyed
3527 
3528        You cannot set new nonzero locations into this matrix, that will generate an error.
3529 
3530        The i and j indices are 0 based
3531 
3532        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3533 
3534       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3535       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3536       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3537       with column-major ordering within blocks.
3538 
3539 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3540 
3541 @*/
3542 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3543 {
3544   PetscErrorCode ierr;
3545   PetscInt       ii;
3546   Mat_SeqBAIJ    *baij;
3547 
3548   PetscFunctionBegin;
3549   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3550   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3551 
3552   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3553   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3554   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3555   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3556   baij = (Mat_SeqBAIJ*)(*mat)->data;
3557   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3558   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3559 
3560   baij->i = i;
3561   baij->j = j;
3562   baij->a = a;
3563 
3564   baij->singlemalloc = PETSC_FALSE;
3565   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3566   baij->free_a       = PETSC_FALSE;
3567   baij->free_ij      = PETSC_FALSE;
3568 
3569   for (ii=0; ii<m; ii++) {
3570     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3571 #if defined(PETSC_USE_DEBUG)
3572     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3573 #endif
3574   }
3575 #if defined(PETSC_USE_DEBUG)
3576   for (ii=0; ii<baij->i[m]; ii++) {
3577     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3578     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3579   }
3580 #endif
3581 
3582   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3583   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3584   PetscFunctionReturn(0);
3585 }
3586 
3587 #undef __FUNCT__
3588 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3589 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3590 {
3591   PetscErrorCode ierr;
3592 
3593   PetscFunctionBegin;
3594   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3595   PetscFunctionReturn(0);
3596 }
3597