xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ec1892c82279d5cc06e9d5039e7affb2a767a275)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   allowzeropivot = PetscNot(A->erroriffailure);
24 
25   if (a->idiagvalid) {
26     if (values) *values = a->idiag;
27     PetscFunctionReturn(0);
28   }
29   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
30   diag_offset = a->diag;
31   if (!a->idiag) {
32     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
33     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
34   }
35   diag  = a->idiag;
36   mdiag = a->idiag+bs2*mbs;
37   if (values) *values = a->idiag;
38   /* factor and invert each block */
39   switch (bs) {
40   case 1:
41     for (i=0; i<mbs; i++) {
42       odiag    = v + 1*diag_offset[i];
43       diag[0]  = odiag[0];
44       mdiag[0] = odiag[0];
45 
46       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
47         if (allowzeropivot) {
48           A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
49           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
50         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i);
51       }
52 
53       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
54       diag    += 1;
55       mdiag   += 1;
56     }
57     break;
58   case 2:
59     for (i=0; i<mbs; i++) {
60       odiag    = v + 4*diag_offset[i];
61       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
62       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
63       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
64       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
65       diag    += 4;
66       mdiag   += 4;
67     }
68     break;
69   case 3:
70     for (i=0; i<mbs; i++) {
71       odiag    = v + 9*diag_offset[i];
72       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
73       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
74       diag[8]  = odiag[8];
75       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
76       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
77       mdiag[8] = odiag[8];
78       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
79       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80       diag    += 9;
81       mdiag   += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
88       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92       mdiag += 16;
93     }
94     break;
95   case 5:
96     for (i=0; i<mbs; i++) {
97       odiag  = v + 25*diag_offset[i];
98       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
99       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
100       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
101       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
102       diag  += 25;
103       mdiag += 25;
104     }
105     break;
106   case 6:
107     for (i=0; i<mbs; i++) {
108       odiag  = v + 36*diag_offset[i];
109       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
110       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
111       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
112       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
113       diag  += 36;
114       mdiag += 36;
115     }
116     break;
117   case 7:
118     for (i=0; i<mbs; i++) {
119       odiag  = v + 49*diag_offset[i];
120       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
121       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
122       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
123       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
124       diag  += 49;
125       mdiag += 49;
126     }
127     break;
128   default:
129     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
130     for (i=0; i<mbs; i++) {
131       odiag  = v + bs2*diag_offset[i];
132       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
133       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
134       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
135       diag  += bs2;
136       mdiag += bs2;
137     }
138     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
139   }
140   a->idiagvalid = PETSC_TRUE;
141   PetscFunctionReturn(0);
142 }
143 
144 #undef __FUNCT__
145 #define __FUNCT__ "MatSOR_SeqBAIJ"
146 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
147 {
148   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
149   PetscScalar       *x,*work,*w,*workt,*t;
150   const MatScalar   *v,*aa = a->a, *idiag;
151   const PetscScalar *b,*xb;
152   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
153   PetscErrorCode    ierr;
154   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
155   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
156 
157   PetscFunctionBegin;
158   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
159   its = its*lits;
160   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
161   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
162   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
163   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
164   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
165 
166   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
167 
168   if (!m) PetscFunctionReturn(0);
169   diag  = a->diag;
170   idiag = a->idiag;
171   k    = PetscMax(A->rmap->n,A->cmap->n);
172   if (!a->mult_work) {
173     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
174   }
175   if (!a->sor_workt) {
176     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
177   }
178   if (!a->sor_work) {
179     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
180   }
181   work = a->mult_work;
182   t    = a->sor_workt;
183   w    = a->sor_work;
184 
185   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
186   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
187 
188   if (flag & SOR_ZERO_INITIAL_GUESS) {
189     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
190       switch (bs) {
191       case 1:
192         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
193         t[0] = b[0];
194         i2     = 1;
195         idiag += 1;
196         for (i=1; i<m; i++) {
197           v  = aa + ai[i];
198           vi = aj + ai[i];
199           nz = diag[i] - ai[i];
200           s[0] = b[i2];
201           for (j=0; j<nz; j++) {
202             xw[0] = x[vi[j]];
203             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
204           }
205           t[i2] = s[0];
206           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
207           x[i2]  = xw[0];
208           idiag += 1;
209           i2    += 1;
210         }
211         break;
212       case 2:
213         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
214         t[0] = b[0]; t[1] = b[1];
215         i2     = 2;
216         idiag += 4;
217         for (i=1; i<m; i++) {
218           v  = aa + 4*ai[i];
219           vi = aj + ai[i];
220           nz = diag[i] - ai[i];
221           s[0] = b[i2]; s[1] = b[i2+1];
222           for (j=0; j<nz; j++) {
223             idx = 2*vi[j];
224             it  = 4*j;
225             xw[0] = x[idx]; xw[1] = x[1+idx];
226             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
227           }
228           t[i2] = s[0]; t[i2+1] = s[1];
229           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
230           x[i2]   = xw[0]; x[i2+1] = xw[1];
231           idiag  += 4;
232           i2     += 2;
233         }
234         break;
235       case 3:
236         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
237         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
238         i2     = 3;
239         idiag += 9;
240         for (i=1; i<m; i++) {
241           v  = aa + 9*ai[i];
242           vi = aj + ai[i];
243           nz = diag[i] - ai[i];
244           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
245           while (nz--) {
246             idx = 3*(*vi++);
247             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
248             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
249             v  += 9;
250           }
251           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
252           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
253           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
254           idiag  += 9;
255           i2     += 3;
256         }
257         break;
258       case 4:
259         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
260         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
261         i2     = 4;
262         idiag += 16;
263         for (i=1; i<m; i++) {
264           v  = aa + 16*ai[i];
265           vi = aj + ai[i];
266           nz = diag[i] - ai[i];
267           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
268           while (nz--) {
269             idx = 4*(*vi++);
270             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
271             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
272             v  += 16;
273           }
274           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
275           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
276           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
277           idiag  += 16;
278           i2     += 4;
279         }
280         break;
281       case 5:
282         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
283         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
284         i2     = 5;
285         idiag += 25;
286         for (i=1; i<m; i++) {
287           v  = aa + 25*ai[i];
288           vi = aj + ai[i];
289           nz = diag[i] - ai[i];
290           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
291           while (nz--) {
292             idx = 5*(*vi++);
293             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
294             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
295             v  += 25;
296           }
297           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
298           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
299           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
300           idiag  += 25;
301           i2     += 5;
302         }
303         break;
304       case 6:
305         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
306         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
307         i2     = 6;
308         idiag += 36;
309         for (i=1; i<m; i++) {
310           v  = aa + 36*ai[i];
311           vi = aj + ai[i];
312           nz = diag[i] - ai[i];
313           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
314           while (nz--) {
315             idx = 6*(*vi++);
316             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
317             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
318             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
319             v  += 36;
320           }
321           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
322           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
323           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
324           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
325           idiag  += 36;
326           i2     += 6;
327         }
328         break;
329       case 7:
330         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
331         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
332         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
333         i2     = 7;
334         idiag += 49;
335         for (i=1; i<m; i++) {
336           v  = aa + 49*ai[i];
337           vi = aj + ai[i];
338           nz = diag[i] - ai[i];
339           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
340           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
341           while (nz--) {
342             idx = 7*(*vi++);
343             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
344             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
345             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
346             v  += 49;
347           }
348           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
349           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
350           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
351           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
352           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
353           idiag  += 49;
354           i2     += 7;
355         }
356         break;
357       default:
358         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
359         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
360         i2     = bs;
361         idiag += bs2;
362         for (i=1; i<m; i++) {
363           v  = aa + bs2*ai[i];
364           vi = aj + ai[i];
365           nz = diag[i] - ai[i];
366 
367           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
368           /* copy all rows of x that are needed into contiguous space */
369           workt = work;
370           for (j=0; j<nz; j++) {
371             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
372             workt += bs;
373           }
374           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
375           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
376           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
377 
378           idiag += bs2;
379           i2    += bs;
380         }
381         break;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
385       xb = t;
386     }
387     else xb = b;
388     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       idiag = a->idiag+bs2*(a->mbs-1);
390       i2 = bs * (m-1);
391       switch (bs) {
392       case 1:
393         s[0]  = xb[i2];
394         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395         x[i2] = xw[0];
396         i2   -= 1;
397         for (i=m-2; i>=0; i--) {
398           v  = aa + (diag[i]+1);
399           vi = aj + diag[i] + 1;
400           nz = ai[i+1] - diag[i] - 1;
401           s[0] = xb[i2];
402           for (j=0; j<nz; j++) {
403             xw[0] = x[vi[j]];
404             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
405           }
406           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
407           x[i2]  = xw[0];
408           idiag -= 1;
409           i2    -= 1;
410         }
411         break;
412       case 2:
413         s[0]  = xb[i2]; s[1] = xb[i2+1];
414         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
415         x[i2] = xw[0]; x[i2+1] = xw[1];
416         i2    -= 2;
417         idiag -= 4;
418         for (i=m-2; i>=0; i--) {
419           v  = aa + 4*(diag[i] + 1);
420           vi = aj + diag[i] + 1;
421           nz = ai[i+1] - diag[i] - 1;
422           s[0] = xb[i2]; s[1] = xb[i2+1];
423           for (j=0; j<nz; j++) {
424             idx = 2*vi[j];
425             it  = 4*j;
426             xw[0] = x[idx]; xw[1] = x[1+idx];
427             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
428           }
429           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
430           x[i2]   = xw[0]; x[i2+1] = xw[1];
431           idiag  -= 4;
432           i2     -= 2;
433         }
434         break;
435       case 3:
436         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
437         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
438         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
439         i2    -= 3;
440         idiag -= 9;
441         for (i=m-2; i>=0; i--) {
442           v  = aa + 9*(diag[i]+1);
443           vi = aj + diag[i] + 1;
444           nz = ai[i+1] - diag[i] - 1;
445           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
446           while (nz--) {
447             idx = 3*(*vi++);
448             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
449             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
450             v  += 9;
451           }
452           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
453           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
454           idiag  -= 9;
455           i2     -= 3;
456         }
457         break;
458       case 4:
459         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
460         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
461         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
462         i2    -= 4;
463         idiag -= 16;
464         for (i=m-2; i>=0; i--) {
465           v  = aa + 16*(diag[i]+1);
466           vi = aj + diag[i] + 1;
467           nz = ai[i+1] - diag[i] - 1;
468           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
469           while (nz--) {
470             idx = 4*(*vi++);
471             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
472             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
473             v  += 16;
474           }
475           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
476           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
477           idiag  -= 16;
478           i2     -= 4;
479         }
480         break;
481       case 5:
482         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
483         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
484         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
485         i2    -= 5;
486         idiag -= 25;
487         for (i=m-2; i>=0; i--) {
488           v  = aa + 25*(diag[i]+1);
489           vi = aj + diag[i] + 1;
490           nz = ai[i+1] - diag[i] - 1;
491           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
492           while (nz--) {
493             idx = 5*(*vi++);
494             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
495             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
496             v  += 25;
497           }
498           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
499           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
500           idiag  -= 25;
501           i2     -= 5;
502         }
503         break;
504       case 6:
505         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
506         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
507         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
508         i2    -= 6;
509         idiag -= 36;
510         for (i=m-2; i>=0; i--) {
511           v  = aa + 36*(diag[i]+1);
512           vi = aj + diag[i] + 1;
513           nz = ai[i+1] - diag[i] - 1;
514           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
515           while (nz--) {
516             idx = 6*(*vi++);
517             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
518             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
519             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
520             v  += 36;
521           }
522           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
523           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
524           idiag  -= 36;
525           i2     -= 6;
526         }
527         break;
528       case 7:
529         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
532         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
533         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
534         i2    -= 7;
535         idiag -= 49;
536         for (i=m-2; i>=0; i--) {
537           v  = aa + 49*(diag[i]+1);
538           vi = aj + diag[i] + 1;
539           nz = ai[i+1] - diag[i] - 1;
540           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
541           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
542           while (nz--) {
543             idx = 7*(*vi++);
544             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
545             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
546             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
547             v  += 49;
548           }
549           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
550           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
551           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
552           idiag  -= 49;
553           i2     -= 7;
554         }
555         break;
556       default:
557         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
558         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
559         i2    -= bs;
560         idiag -= bs2;
561         for (i=m-2; i>=0; i--) {
562           v  = aa + bs2*(diag[i]+1);
563           vi = aj + diag[i] + 1;
564           nz = ai[i+1] - diag[i] - 1;
565 
566           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
567           /* copy all rows of x that are needed into contiguous space */
568           workt = work;
569           for (j=0; j<nz; j++) {
570             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
571             workt += bs;
572           }
573           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
574           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
575 
576           idiag -= bs2;
577           i2    -= bs;
578         }
579         break;
580       }
581       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
582     }
583     its--;
584   }
585   while (its--) {
586     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
587       idiag = a->idiag;
588       i2 = 0;
589       switch (bs) {
590       case 1:
591         for (i=0; i<m; i++) {
592           v  = aa + ai[i];
593           vi = aj + ai[i];
594           nz = ai[i+1] - ai[i];
595           s[0] = b[i2];
596           for (j=0; j<nz; j++) {
597             xw[0] = x[vi[j]];
598             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
599           }
600           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
601           x[i2] += xw[0];
602           idiag += 1;
603           i2    += 1;
604         }
605         break;
606       case 2:
607         for (i=0; i<m; i++) {
608           v  = aa + 4*ai[i];
609           vi = aj + ai[i];
610           nz = ai[i+1] - ai[i];
611           s[0] = b[i2]; s[1] = b[i2+1];
612           for (j=0; j<nz; j++) {
613             idx = 2*vi[j];
614             it  = 4*j;
615             xw[0] = x[idx]; xw[1] = x[1+idx];
616             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
617           }
618           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
619           x[i2]  += xw[0]; x[i2+1] += xw[1];
620           idiag  += 4;
621           i2     += 2;
622         }
623         break;
624       case 3:
625         for (i=0; i<m; i++) {
626           v  = aa + 9*ai[i];
627           vi = aj + ai[i];
628           nz = ai[i+1] - ai[i];
629           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
630           while (nz--) {
631             idx = 3*(*vi++);
632             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
633             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
634             v  += 9;
635           }
636           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
637           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
638           idiag  += 9;
639           i2     += 3;
640         }
641         break;
642       case 4:
643         for (i=0; i<m; i++) {
644           v  = aa + 16*ai[i];
645           vi = aj + ai[i];
646           nz = ai[i+1] - ai[i];
647           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
648           while (nz--) {
649             idx = 4*(*vi++);
650             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
651             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
652             v  += 16;
653           }
654           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
655           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
656           idiag  += 16;
657           i2     += 4;
658         }
659         break;
660       case 5:
661         for (i=0; i<m; i++) {
662           v  = aa + 25*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
666           while (nz--) {
667             idx = 5*(*vi++);
668             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
669             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
670             v  += 25;
671           }
672           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
673           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
674           idiag  += 25;
675           i2     += 5;
676         }
677         break;
678       case 6:
679         for (i=0; i<m; i++) {
680           v  = aa + 36*ai[i];
681           vi = aj + ai[i];
682           nz = ai[i+1] - ai[i];
683           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
684           while (nz--) {
685             idx = 6*(*vi++);
686             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
688             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
689             v  += 36;
690           }
691           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
692           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
694           idiag  += 36;
695           i2     += 6;
696         }
697         break;
698       case 7:
699         for (i=0; i<m; i++) {
700           v  = aa + 49*ai[i];
701           vi = aj + ai[i];
702           nz = ai[i+1] - ai[i];
703           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
704           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
705           while (nz--) {
706             idx = 7*(*vi++);
707             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
708             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
709             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
710             v  += 49;
711           }
712           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
713           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
714           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
715           idiag  += 49;
716           i2     += 7;
717         }
718         break;
719       default:
720         for (i=0; i<m; i++) {
721           v  = aa + bs2*ai[i];
722           vi = aj + ai[i];
723           nz = ai[i+1] - ai[i];
724 
725           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
726           /* copy all rows of x that are needed into contiguous space */
727           workt = work;
728           for (j=0; j<nz; j++) {
729             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
730             workt += bs;
731           }
732           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
733           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
734 
735           idiag += bs2;
736           i2    += bs;
737         }
738         break;
739       }
740       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
741     }
742     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
743       idiag = a->idiag+bs2*(a->mbs-1);
744       i2 = bs * (m-1);
745       switch (bs) {
746       case 1:
747         for (i=m-1; i>=0; i--) {
748           v  = aa + ai[i];
749           vi = aj + ai[i];
750           nz = ai[i+1] - ai[i];
751           s[0] = b[i2];
752           for (j=0; j<nz; j++) {
753             xw[0] = x[vi[j]];
754             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
755           }
756           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
757           x[i2] += xw[0];
758           idiag -= 1;
759           i2    -= 1;
760         }
761         break;
762       case 2:
763         for (i=m-1; i>=0; i--) {
764           v  = aa + 4*ai[i];
765           vi = aj + ai[i];
766           nz = ai[i+1] - ai[i];
767           s[0] = b[i2]; s[1] = b[i2+1];
768           for (j=0; j<nz; j++) {
769             idx = 2*vi[j];
770             it  = 4*j;
771             xw[0] = x[idx]; xw[1] = x[1+idx];
772             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
773           }
774           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
775           x[i2]  += xw[0]; x[i2+1] += xw[1];
776           idiag  -= 4;
777           i2     -= 2;
778         }
779         break;
780       case 3:
781         for (i=m-1; i>=0; i--) {
782           v  = aa + 9*ai[i];
783           vi = aj + ai[i];
784           nz = ai[i+1] - ai[i];
785           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
786           while (nz--) {
787             idx = 3*(*vi++);
788             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
789             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
790             v  += 9;
791           }
792           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
793           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
794           idiag  -= 9;
795           i2     -= 3;
796         }
797         break;
798       case 4:
799         for (i=m-1; i>=0; i--) {
800           v  = aa + 16*ai[i];
801           vi = aj + ai[i];
802           nz = ai[i+1] - ai[i];
803           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
804           while (nz--) {
805             idx = 4*(*vi++);
806             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
807             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
808             v  += 16;
809           }
810           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
811           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
812           idiag  -= 16;
813           i2     -= 4;
814         }
815         break;
816       case 5:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 25*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
822           while (nz--) {
823             idx = 5*(*vi++);
824             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
825             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
826             v  += 25;
827           }
828           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
829           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
830           idiag  -= 25;
831           i2     -= 5;
832         }
833         break;
834       case 6:
835         for (i=m-1; i>=0; i--) {
836           v  = aa + 36*ai[i];
837           vi = aj + ai[i];
838           nz = ai[i+1] - ai[i];
839           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
840           while (nz--) {
841             idx = 6*(*vi++);
842             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
844             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
845             v  += 36;
846           }
847           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
848           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
850           idiag  -= 36;
851           i2     -= 6;
852         }
853         break;
854       case 7:
855         for (i=m-1; i>=0; i--) {
856           v  = aa + 49*ai[i];
857           vi = aj + ai[i];
858           nz = ai[i+1] - ai[i];
859           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
860           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
861           while (nz--) {
862             idx = 7*(*vi++);
863             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
864             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
865             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
866             v  += 49;
867           }
868           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
869           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
870           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
871           idiag  -= 49;
872           i2     -= 7;
873         }
874         break;
875       default:
876         for (i=m-1; i>=0; i--) {
877           v  = aa + bs2*ai[i];
878           vi = aj + ai[i];
879           nz = ai[i+1] - ai[i];
880 
881           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
882           /* copy all rows of x that are needed into contiguous space */
883           workt = work;
884           for (j=0; j<nz; j++) {
885             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
886             workt += bs;
887           }
888           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
889           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
890 
891           idiag -= bs2;
892           i2    -= bs;
893         }
894         break;
895       }
896       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
897     }
898   }
899   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
900   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 
905 /*
906     Special version for direct calls from Fortran (Used in PETSc-fun3d)
907 */
908 #if defined(PETSC_HAVE_FORTRAN_CAPS)
909 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
910 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
911 #define matsetvaluesblocked4_ matsetvaluesblocked4
912 #endif
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "matsetvaluesblocked4_"
916 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
917 {
918   Mat               A  = *AA;
919   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
920   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
921   PetscInt          *ai    =a->i,*ailen=a->ilen;
922   PetscInt          *aj    =a->j,stepval,lastcol = -1;
923   const PetscScalar *value = v;
924   MatScalar         *ap,*aa = a->a,*bap;
925 
926   PetscFunctionBegin;
927   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
928   stepval = (n-1)*4;
929   for (k=0; k<m; k++) { /* loop over added rows */
930     row  = im[k];
931     rp   = aj + ai[row];
932     ap   = aa + 16*ai[row];
933     nrow = ailen[row];
934     low  = 0;
935     high = nrow;
936     for (l=0; l<n; l++) { /* loop over added columns */
937       col = in[l];
938       if (col <= lastcol)  low = 0;
939       else                high = nrow;
940       lastcol = col;
941       value   = v + k*(stepval+4 + l)*4;
942       while (high-low > 7) {
943         t = (low+high)/2;
944         if (rp[t] > col) high = t;
945         else             low  = t;
946       }
947       for (i=low; i<high; i++) {
948         if (rp[i] > col) break;
949         if (rp[i] == col) {
950           bap = ap +  16*i;
951           for (ii=0; ii<4; ii++,value+=stepval) {
952             for (jj=ii; jj<16; jj+=4) {
953               bap[jj] += *value++;
954             }
955           }
956           goto noinsert2;
957         }
958       }
959       N = nrow++ - 1;
960       high++; /* added new column index thus must search to one higher than before */
961       /* shift up all the later entries in this row */
962       for (ii=N; ii>=i; ii--) {
963         rp[ii+1] = rp[ii];
964         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
965       }
966       if (N >= i) {
967         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
968       }
969       rp[i] = col;
970       bap   = ap +  16*i;
971       for (ii=0; ii<4; ii++,value+=stepval) {
972         for (jj=ii; jj<16; jj+=4) {
973           bap[jj] = *value++;
974         }
975       }
976       noinsert2:;
977       low = i;
978     }
979     ailen[row] = nrow;
980   }
981   PetscFunctionReturnVoid();
982 }
983 
984 #if defined(PETSC_HAVE_FORTRAN_CAPS)
985 #define matsetvalues4_ MATSETVALUES4
986 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
987 #define matsetvalues4_ matsetvalues4
988 #endif
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatSetValues4_"
992 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
993 {
994   Mat         A  = *AA;
995   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
996   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
997   PetscInt    *ai=a->i,*ailen=a->ilen;
998   PetscInt    *aj=a->j,brow,bcol;
999   PetscInt    ridx,cidx,lastcol = -1;
1000   MatScalar   *ap,value,*aa=a->a,*bap;
1001 
1002   PetscFunctionBegin;
1003   for (k=0; k<m; k++) { /* loop over added rows */
1004     row  = im[k]; brow = row/4;
1005     rp   = aj + ai[brow];
1006     ap   = aa + 16*ai[brow];
1007     nrow = ailen[brow];
1008     low  = 0;
1009     high = nrow;
1010     for (l=0; l<n; l++) { /* loop over added columns */
1011       col   = in[l]; bcol = col/4;
1012       ridx  = row % 4; cidx = col % 4;
1013       value = v[l + k*n];
1014       if (col <= lastcol)  low = 0;
1015       else                high = nrow;
1016       lastcol = col;
1017       while (high-low > 7) {
1018         t = (low+high)/2;
1019         if (rp[t] > bcol) high = t;
1020         else              low  = t;
1021       }
1022       for (i=low; i<high; i++) {
1023         if (rp[i] > bcol) break;
1024         if (rp[i] == bcol) {
1025           bap   = ap +  16*i + 4*cidx + ridx;
1026           *bap += value;
1027           goto noinsert1;
1028         }
1029       }
1030       N = nrow++ - 1;
1031       high++; /* added new column thus must search to one higher than before */
1032       /* shift up all the later entries in this row */
1033       for (ii=N; ii>=i; ii--) {
1034         rp[ii+1] = rp[ii];
1035         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1036       }
1037       if (N>=i) {
1038         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1039       }
1040       rp[i]                    = bcol;
1041       ap[16*i + 4*cidx + ridx] = value;
1042 noinsert1:;
1043       low = i;
1044     }
1045     ailen[brow] = nrow;
1046   }
1047   PetscFunctionReturnVoid();
1048 }
1049 
1050 /*
1051      Checks for missing diagonals
1052 */
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1055 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1056 {
1057   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1058   PetscErrorCode ierr;
1059   PetscInt       *diag,*ii = a->i,i;
1060 
1061   PetscFunctionBegin;
1062   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1063   *missing = PETSC_FALSE;
1064   if (A->rmap->n > 0 && !ii) {
1065     *missing = PETSC_TRUE;
1066     if (d) *d = 0;
1067     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1068   } else {
1069     diag = a->diag;
1070     for (i=0; i<a->mbs; i++) {
1071       if (diag[i] >= ii[i+1]) {
1072         *missing = PETSC_TRUE;
1073         if (d) *d = i;
1074         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1075         break;
1076       }
1077     }
1078   }
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1084 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1085 {
1086   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1087   PetscErrorCode ierr;
1088   PetscInt       i,j,m = a->mbs;
1089 
1090   PetscFunctionBegin;
1091   if (!a->diag) {
1092     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1093     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1094     a->free_diag = PETSC_TRUE;
1095   }
1096   for (i=0; i<m; i++) {
1097     a->diag[i] = a->i[i+1];
1098     for (j=a->i[i]; j<a->i[i+1]; j++) {
1099       if (a->j[j] == i) {
1100         a->diag[i] = j;
1101         break;
1102       }
1103     }
1104   }
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1111 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1112 {
1113   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1114   PetscErrorCode ierr;
1115   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1116   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1117 
1118   PetscFunctionBegin;
1119   *nn = n;
1120   if (!ia) PetscFunctionReturn(0);
1121   if (symmetric) {
1122     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1123     nz   = tia[n];
1124   } else {
1125     tia = a->i; tja = a->j;
1126   }
1127 
1128   if (!blockcompressed && bs > 1) {
1129     (*nn) *= bs;
1130     /* malloc & create the natural set of indices */
1131     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1132     if (n) {
1133       (*ia)[0] = 0;
1134       for (j=1; j<bs; j++) {
1135         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1136       }
1137     }
1138 
1139     for (i=1; i<n; i++) {
1140       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1141       for (j=1; j<bs; j++) {
1142         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1143       }
1144     }
1145     if (n) {
1146       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1147     }
1148 
1149     if (inja) {
1150       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1151       cnt = 0;
1152       for (i=0; i<n; i++) {
1153         for (j=0; j<bs; j++) {
1154           for (k=tia[i]; k<tia[i+1]; k++) {
1155             for (l=0; l<bs; l++) {
1156               (*ja)[cnt++] = bs*tja[k] + l;
1157             }
1158           }
1159         }
1160       }
1161     }
1162 
1163     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1164       ierr = PetscFree(tia);CHKERRQ(ierr);
1165       ierr = PetscFree(tja);CHKERRQ(ierr);
1166     }
1167   } else if (oshift == 1) {
1168     if (symmetric) {
1169       nz = tia[A->rmap->n/bs];
1170       /*  add 1 to i and j indices */
1171       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1172       *ia = tia;
1173       if (ja) {
1174         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1175         *ja = tja;
1176       }
1177     } else {
1178       nz = a->i[A->rmap->n/bs];
1179       /* malloc space and  add 1 to i and j indices */
1180       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1181       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1182       if (ja) {
1183         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1184         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1185       }
1186     }
1187   } else {
1188     *ia = tia;
1189     if (ja) *ja = tja;
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1196 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1197 {
1198   PetscErrorCode ierr;
1199 
1200   PetscFunctionBegin;
1201   if (!ia) PetscFunctionReturn(0);
1202   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1203     ierr = PetscFree(*ia);CHKERRQ(ierr);
1204     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1205   }
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1211 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1212 {
1213   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217 #if defined(PETSC_USE_LOG)
1218   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1219 #endif
1220   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1221   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1222   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1223   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1224   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1225   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1226   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1227   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1228   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1229   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1230   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1231   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1232   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1233 
1234   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1235   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1236   ierr = PetscFree(A->data);CHKERRQ(ierr);
1237 
1238   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1254 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1255 {
1256   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   switch (op) {
1261   case MAT_ROW_ORIENTED:
1262     a->roworiented = flg;
1263     break;
1264   case MAT_KEEP_NONZERO_PATTERN:
1265     a->keepnonzeropattern = flg;
1266     break;
1267   case MAT_NEW_NONZERO_LOCATIONS:
1268     a->nonew = (flg ? 0 : 1);
1269     break;
1270   case MAT_NEW_NONZERO_LOCATION_ERR:
1271     a->nonew = (flg ? -1 : 0);
1272     break;
1273   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1274     a->nonew = (flg ? -2 : 0);
1275     break;
1276   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1277     a->nounused = (flg ? -1 : 0);
1278     break;
1279   case MAT_NEW_DIAGONALS:
1280   case MAT_IGNORE_OFF_PROC_ENTRIES:
1281   case MAT_USE_HASH_TABLE:
1282     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1283     break;
1284   case MAT_SPD:
1285   case MAT_SYMMETRIC:
1286   case MAT_STRUCTURALLY_SYMMETRIC:
1287   case MAT_HERMITIAN:
1288   case MAT_SYMMETRY_ETERNAL:
1289     /* These options are handled directly by MatSetOption() */
1290     break;
1291   default:
1292     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1300 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1301 {
1302   PetscErrorCode ierr;
1303   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1304   MatScalar      *aa_i;
1305   PetscScalar    *v_i;
1306 
1307   PetscFunctionBegin;
1308   bs  = A->rmap->bs;
1309   bs2 = bs*bs;
1310   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1311 
1312   bn  = row/bs;   /* Block number */
1313   bp  = row % bs; /* Block Position */
1314   M   = ai[bn+1] - ai[bn];
1315   *nz = bs*M;
1316 
1317   if (v) {
1318     *v = 0;
1319     if (*nz) {
1320       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1321       for (i=0; i<M; i++) { /* for each block in the block row */
1322         v_i  = *v + i*bs;
1323         aa_i = aa + bs2*(ai[bn] + i);
1324         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1325       }
1326     }
1327   }
1328 
1329   if (idx) {
1330     *idx = 0;
1331     if (*nz) {
1332       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1333       for (i=0; i<M; i++) { /* for each block in the block row */
1334         idx_i = *idx + i*bs;
1335         itmp  = bs*aj[ai[bn] + i];
1336         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1337       }
1338     }
1339   }
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1345 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1346 {
1347   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1357 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1358 {
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1363   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1371 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1372 {
1373   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1374   Mat            C;
1375   PetscErrorCode ierr;
1376   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1377   PetscInt       *rows,*cols,bs2=a->bs2;
1378   MatScalar      *array;
1379 
1380   PetscFunctionBegin;
1381   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1382   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1383     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1384 
1385     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1386     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1387     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1388     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1389     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1390     ierr = PetscFree(col);CHKERRQ(ierr);
1391   } else {
1392     C = *B;
1393   }
1394 
1395   array = a->a;
1396   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1397   for (i=0; i<mbs; i++) {
1398     cols[0] = i*bs;
1399     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1400     len = ai[i+1] - ai[i];
1401     for (j=0; j<len; j++) {
1402       rows[0] = (*aj++)*bs;
1403       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1404       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1405       array += bs2;
1406     }
1407   }
1408   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1409 
1410   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412 
1413   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1414     *B = C;
1415   } else {
1416     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1423 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1424 {
1425   PetscErrorCode ierr;
1426   Mat            Btrans;
1427 
1428   PetscFunctionBegin;
1429   *f   = PETSC_FALSE;
1430   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1431   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1432   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1438 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1439 {
1440   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1441   PetscErrorCode ierr;
1442   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1443   int            fd;
1444   PetscScalar    *aa;
1445   FILE           *file;
1446 
1447   PetscFunctionBegin;
1448   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1449   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1450   col_lens[0] = MAT_FILE_CLASSID;
1451 
1452   col_lens[1] = A->rmap->N;
1453   col_lens[2] = A->cmap->n;
1454   col_lens[3] = a->nz*bs2;
1455 
1456   /* store lengths of each row and write (including header) to file */
1457   count = 0;
1458   for (i=0; i<a->mbs; i++) {
1459     for (j=0; j<bs; j++) {
1460       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1461     }
1462   }
1463   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1464   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1465 
1466   /* store column indices (zero start index) */
1467   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1468   count = 0;
1469   for (i=0; i<a->mbs; i++) {
1470     for (j=0; j<bs; j++) {
1471       for (k=a->i[i]; k<a->i[i+1]; k++) {
1472         for (l=0; l<bs; l++) {
1473           jj[count++] = bs*a->j[k] + l;
1474         }
1475       }
1476     }
1477   }
1478   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1479   ierr = PetscFree(jj);CHKERRQ(ierr);
1480 
1481   /* store nonzero values */
1482   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1483   count = 0;
1484   for (i=0; i<a->mbs; i++) {
1485     for (j=0; j<bs; j++) {
1486       for (k=a->i[i]; k<a->i[i+1]; k++) {
1487         for (l=0; l<bs; l++) {
1488           aa[count++] = a->a[bs2*k + l*bs + j];
1489         }
1490       }
1491     }
1492   }
1493   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1494   ierr = PetscFree(aa);CHKERRQ(ierr);
1495 
1496   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1497   if (file) {
1498     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1505 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1506 {
1507   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1508   PetscErrorCode    ierr;
1509   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1510   PetscViewerFormat format;
1511 
1512   PetscFunctionBegin;
1513   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1514   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1515     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1516   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1517     const char *matname;
1518     Mat        aij;
1519     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1520     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1521     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1522     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1523     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1524   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1525       PetscFunctionReturn(0);
1526   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1527     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1528     for (i=0; i<a->mbs; i++) {
1529       for (j=0; j<bs; j++) {
1530         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1531         for (k=a->i[i]; k<a->i[i+1]; k++) {
1532           for (l=0; l<bs; l++) {
1533 #if defined(PETSC_USE_COMPLEX)
1534             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1536                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1539                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1542             }
1543 #else
1544             if (a->a[bs2*k + l*bs + j] != 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1546             }
1547 #endif
1548           }
1549         }
1550         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1551       }
1552     }
1553     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1554   } else {
1555     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1556     for (i=0; i<a->mbs; i++) {
1557       for (j=0; j<bs; j++) {
1558         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1559         for (k=a->i[i]; k<a->i[i+1]; k++) {
1560           for (l=0; l<bs; l++) {
1561 #if defined(PETSC_USE_COMPLEX)
1562             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1564                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1567                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             } else {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1570             }
1571 #else
1572             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1573 #endif
1574           }
1575         }
1576         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1577       }
1578     }
1579     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1580   }
1581   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #include <petscdraw.h>
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1588 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1589 {
1590   Mat               A = (Mat) Aa;
1591   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1592   PetscErrorCode    ierr;
1593   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1594   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1595   MatScalar         *aa;
1596   PetscViewer       viewer;
1597   PetscViewerFormat format;
1598 
1599   PetscFunctionBegin;
1600   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1601   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1602 
1603   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1604 
1605   /* loop over matrix elements drawing boxes */
1606 
1607   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1608     color = PETSC_DRAW_BLUE;
1609     for (i=0,row=0; i<mbs; i++,row+=bs) {
1610       for (j=a->i[i]; j<a->i[i+1]; j++) {
1611         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1612         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1613         aa  = a->a + j*bs2;
1614         for (k=0; k<bs; k++) {
1615           for (l=0; l<bs; l++) {
1616             if (PetscRealPart(*aa++) >=  0.) continue;
1617             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1618           }
1619         }
1620       }
1621     }
1622     color = PETSC_DRAW_CYAN;
1623     for (i=0,row=0; i<mbs; i++,row+=bs) {
1624       for (j=a->i[i]; j<a->i[i+1]; j++) {
1625         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1626         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1627         aa  = a->a + j*bs2;
1628         for (k=0; k<bs; k++) {
1629           for (l=0; l<bs; l++) {
1630             if (PetscRealPart(*aa++) != 0.) continue;
1631             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1632           }
1633         }
1634       }
1635     }
1636     color = PETSC_DRAW_RED;
1637     for (i=0,row=0; i<mbs; i++,row+=bs) {
1638       for (j=a->i[i]; j<a->i[i+1]; j++) {
1639         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1640         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1641         aa  = a->a + j*bs2;
1642         for (k=0; k<bs; k++) {
1643           for (l=0; l<bs; l++) {
1644             if (PetscRealPart(*aa++) <= 0.) continue;
1645             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1646           }
1647         }
1648       }
1649     }
1650   } else {
1651     /* use contour shading to indicate magnitude of values */
1652     /* first determine max of all nonzero values */
1653     PetscDraw popup;
1654     PetscReal scale,maxv = 0.0;
1655 
1656     for (i=0; i<a->nz*a->bs2; i++) {
1657       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1658     }
1659     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1660     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1661     if (popup) {
1662       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1663     }
1664     for (i=0,row=0; i<mbs; i++,row+=bs) {
1665       for (j=a->i[i]; j<a->i[i+1]; j++) {
1666         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1667         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1668         aa  = a->a + j*bs2;
1669         for (k=0; k<bs; k++) {
1670           for (l=0; l<bs; l++) {
1671             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1672             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1673           }
1674         }
1675       }
1676     }
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1683 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1684 {
1685   PetscErrorCode ierr;
1686   PetscReal      xl,yl,xr,yr,w,h;
1687   PetscDraw      draw;
1688   PetscBool      isnull;
1689 
1690   PetscFunctionBegin;
1691   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1692   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1693 
1694   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1695   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1696   xr  += w;    yr += h;  xl = -w;     yl = -h;
1697   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1698   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1699   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatView_SeqBAIJ"
1705 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1706 {
1707   PetscErrorCode ierr;
1708   PetscBool      iascii,isbinary,isdraw;
1709 
1710   PetscFunctionBegin;
1711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1714   if (iascii) {
1715     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1716   } else if (isbinary) {
1717     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1718   } else if (isdraw) {
1719     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1720   } else {
1721     Mat B;
1722     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1723     ierr = MatView(B,viewer);CHKERRQ(ierr);
1724     ierr = MatDestroy(&B);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1732 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1733 {
1734   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1735   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1736   PetscInt    *ai = a->i,*ailen = a->ilen;
1737   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1738   MatScalar   *ap,*aa = a->a;
1739 
1740   PetscFunctionBegin;
1741   for (k=0; k<m; k++) { /* loop over rows */
1742     row = im[k]; brow = row/bs;
1743     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1744     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1745     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1746     nrow = ailen[brow];
1747     for (l=0; l<n; l++) { /* loop over columns */
1748       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1749       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1750       col  = in[l];
1751       bcol = col/bs;
1752       cidx = col%bs;
1753       ridx = row%bs;
1754       high = nrow;
1755       low  = 0; /* assume unsorted */
1756       while (high-low > 5) {
1757         t = (low+high)/2;
1758         if (rp[t] > bcol) high = t;
1759         else             low  = t;
1760       }
1761       for (i=low; i<high; i++) {
1762         if (rp[i] > bcol) break;
1763         if (rp[i] == bcol) {
1764           *v++ = ap[bs2*i+bs*cidx+ridx];
1765           goto finished;
1766         }
1767       }
1768       *v++ = 0.0;
1769 finished:;
1770     }
1771   }
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 #undef __FUNCT__
1776 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1777 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1778 {
1779   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1780   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1781   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1782   PetscErrorCode    ierr;
1783   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1784   PetscBool         roworiented=a->roworiented;
1785   const PetscScalar *value     = v;
1786   MatScalar         *ap,*aa = a->a,*bap;
1787 
1788   PetscFunctionBegin;
1789   if (roworiented) {
1790     stepval = (n-1)*bs;
1791   } else {
1792     stepval = (m-1)*bs;
1793   }
1794   for (k=0; k<m; k++) { /* loop over added rows */
1795     row = im[k];
1796     if (row < 0) continue;
1797 #if defined(PETSC_USE_DEBUG)
1798     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1799 #endif
1800     rp   = aj + ai[row];
1801     ap   = aa + bs2*ai[row];
1802     rmax = imax[row];
1803     nrow = ailen[row];
1804     low  = 0;
1805     high = nrow;
1806     for (l=0; l<n; l++) { /* loop over added columns */
1807       if (in[l] < 0) continue;
1808 #if defined(PETSC_USE_DEBUG)
1809       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1810 #endif
1811       col = in[l];
1812       if (roworiented) {
1813         value = v + (k*(stepval+bs) + l)*bs;
1814       } else {
1815         value = v + (l*(stepval+bs) + k)*bs;
1816       }
1817       if (col <= lastcol) low = 0;
1818       else high = nrow;
1819       lastcol = col;
1820       while (high-low > 7) {
1821         t = (low+high)/2;
1822         if (rp[t] > col) high = t;
1823         else             low  = t;
1824       }
1825       for (i=low; i<high; i++) {
1826         if (rp[i] > col) break;
1827         if (rp[i] == col) {
1828           bap = ap +  bs2*i;
1829           if (roworiented) {
1830             if (is == ADD_VALUES) {
1831               for (ii=0; ii<bs; ii++,value+=stepval) {
1832                 for (jj=ii; jj<bs2; jj+=bs) {
1833                   bap[jj] += *value++;
1834                 }
1835               }
1836             } else {
1837               for (ii=0; ii<bs; ii++,value+=stepval) {
1838                 for (jj=ii; jj<bs2; jj+=bs) {
1839                   bap[jj] = *value++;
1840                 }
1841               }
1842             }
1843           } else {
1844             if (is == ADD_VALUES) {
1845               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1846                 for (jj=0; jj<bs; jj++) {
1847                   bap[jj] += value[jj];
1848                 }
1849                 bap += bs;
1850               }
1851             } else {
1852               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1853                 for (jj=0; jj<bs; jj++) {
1854                   bap[jj]  = value[jj];
1855                 }
1856                 bap += bs;
1857               }
1858             }
1859           }
1860           goto noinsert2;
1861         }
1862       }
1863       if (nonew == 1) goto noinsert2;
1864       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1865       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1866       N = nrow++ - 1; high++;
1867       /* shift up all the later entries in this row */
1868       for (ii=N; ii>=i; ii--) {
1869         rp[ii+1] = rp[ii];
1870         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1871       }
1872       if (N >= i) {
1873         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1874       }
1875       rp[i] = col;
1876       bap   = ap +  bs2*i;
1877       if (roworiented) {
1878         for (ii=0; ii<bs; ii++,value+=stepval) {
1879           for (jj=ii; jj<bs2; jj+=bs) {
1880             bap[jj] = *value++;
1881           }
1882         }
1883       } else {
1884         for (ii=0; ii<bs; ii++,value+=stepval) {
1885           for (jj=0; jj<bs; jj++) {
1886             *bap++ = *value++;
1887           }
1888         }
1889       }
1890 noinsert2:;
1891       low = i;
1892     }
1893     ailen[row] = nrow;
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1900 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1901 {
1902   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1903   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1904   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1905   PetscErrorCode ierr;
1906   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1907   MatScalar      *aa  = a->a,*ap;
1908   PetscReal      ratio=0.6;
1909 
1910   PetscFunctionBegin;
1911   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1912 
1913   if (m) rmax = ailen[0];
1914   for (i=1; i<mbs; i++) {
1915     /* move each row back by the amount of empty slots (fshift) before it*/
1916     fshift += imax[i-1] - ailen[i-1];
1917     rmax    = PetscMax(rmax,ailen[i]);
1918     if (fshift) {
1919       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1920       N  = ailen[i];
1921       for (j=0; j<N; j++) {
1922         ip[j-fshift] = ip[j];
1923 
1924         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1925       }
1926     }
1927     ai[i] = ai[i-1] + ailen[i-1];
1928   }
1929   if (mbs) {
1930     fshift += imax[mbs-1] - ailen[mbs-1];
1931     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1932   }
1933 
1934   /* reset ilen and imax for each row */
1935   a->nonzerorowcnt = 0;
1936   for (i=0; i<mbs; i++) {
1937     ailen[i] = imax[i] = ai[i+1] - ai[i];
1938     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1939   }
1940   a->nz = ai[mbs];
1941 
1942   /* diagonals may have moved, so kill the diagonal pointers */
1943   a->idiagvalid = PETSC_FALSE;
1944   if (fshift && a->diag) {
1945     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1946     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1947     a->diag = 0;
1948   }
1949   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1950   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1952   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1953 
1954   A->info.mallocs    += a->reallocs;
1955   a->reallocs         = 0;
1956   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1957   a->rmax             = rmax;
1958 
1959   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 /*
1964    This function returns an array of flags which indicate the locations of contiguous
1965    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1966    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1967    Assume: sizes should be long enough to hold all the values.
1968 */
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1971 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1972 {
1973   PetscInt  i,j,k,row;
1974   PetscBool flg;
1975 
1976   PetscFunctionBegin;
1977   for (i=0,j=0; i<n; j++) {
1978     row = idx[i];
1979     if (row%bs!=0) { /* Not the begining of a block */
1980       sizes[j] = 1;
1981       i++;
1982     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1983       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1984       i++;
1985     } else { /* Begining of the block, so check if the complete block exists */
1986       flg = PETSC_TRUE;
1987       for (k=1; k<bs; k++) {
1988         if (row+k != idx[i+k]) { /* break in the block */
1989           flg = PETSC_FALSE;
1990           break;
1991         }
1992       }
1993       if (flg) { /* No break in the bs */
1994         sizes[j] = bs;
1995         i       += bs;
1996       } else {
1997         sizes[j] = 1;
1998         i++;
1999       }
2000     }
2001   }
2002   *bs_max = j;
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2008 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2009 {
2010   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2011   PetscErrorCode    ierr;
2012   PetscInt          i,j,k,count,*rows;
2013   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2014   PetscScalar       zero = 0.0;
2015   MatScalar         *aa;
2016   const PetscScalar *xx;
2017   PetscScalar       *bb;
2018 
2019   PetscFunctionBegin;
2020   /* fix right hand side if needed */
2021   if (x && b) {
2022     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2023     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2024     for (i=0; i<is_n; i++) {
2025       bb[is_idx[i]] = diag*xx[is_idx[i]];
2026     }
2027     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2028     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2029   }
2030 
2031   /* Make a copy of the IS and  sort it */
2032   /* allocate memory for rows,sizes */
2033   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2034 
2035   /* copy IS values to rows, and sort them */
2036   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2037   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2038 
2039   if (baij->keepnonzeropattern) {
2040     for (i=0; i<is_n; i++) sizes[i] = 1;
2041     bs_max          = is_n;
2042   } else {
2043     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2044     A->nonzerostate++;
2045   }
2046 
2047   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2048     row = rows[j];
2049     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2050     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2051     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2052     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2053       if (diag != (PetscScalar)0.0) {
2054         if (baij->ilen[row/bs] > 0) {
2055           baij->ilen[row/bs]       = 1;
2056           baij->j[baij->i[row/bs]] = row/bs;
2057 
2058           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2059         }
2060         /* Now insert all the diagonal values for this bs */
2061         for (k=0; k<bs; k++) {
2062           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2063         }
2064       } else { /* (diag == 0.0) */
2065         baij->ilen[row/bs] = 0;
2066       } /* end (diag == 0.0) */
2067     } else { /* (sizes[i] != bs) */
2068 #if defined(PETSC_USE_DEBUG)
2069       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2070 #endif
2071       for (k=0; k<count; k++) {
2072         aa[0] =  zero;
2073         aa   += bs;
2074       }
2075       if (diag != (PetscScalar)0.0) {
2076         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2077       }
2078     }
2079   }
2080 
2081   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2082   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2088 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2089 {
2090   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2091   PetscErrorCode    ierr;
2092   PetscInt          i,j,k,count;
2093   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2094   PetscScalar       zero = 0.0;
2095   MatScalar         *aa;
2096   const PetscScalar *xx;
2097   PetscScalar       *bb;
2098   PetscBool         *zeroed,vecs = PETSC_FALSE;
2099 
2100   PetscFunctionBegin;
2101   /* fix right hand side if needed */
2102   if (x && b) {
2103     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2104     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2105     vecs = PETSC_TRUE;
2106   }
2107 
2108   /* zero the columns */
2109   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2110   for (i=0; i<is_n; i++) {
2111     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2112     zeroed[is_idx[i]] = PETSC_TRUE;
2113   }
2114   for (i=0; i<A->rmap->N; i++) {
2115     if (!zeroed[i]) {
2116       row = i/bs;
2117       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2118         for (k=0; k<bs; k++) {
2119           col = bs*baij->j[j] + k;
2120           if (zeroed[col]) {
2121             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2122             if (vecs) bb[i] -= aa[0]*xx[col];
2123             aa[0] = 0.0;
2124           }
2125         }
2126       }
2127     } else if (vecs) bb[i] = diag*xx[i];
2128   }
2129   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2130   if (vecs) {
2131     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2132     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2133   }
2134 
2135   /* zero the rows */
2136   for (i=0; i<is_n; i++) {
2137     row   = is_idx[i];
2138     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2139     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2140     for (k=0; k<count; k++) {
2141       aa[0] =  zero;
2142       aa   += bs;
2143     }
2144     if (diag != (PetscScalar)0.0) {
2145       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2146     }
2147   }
2148   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2154 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2155 {
2156   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2157   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2158   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2159   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2160   PetscErrorCode ierr;
2161   PetscInt       ridx,cidx,bs2=a->bs2;
2162   PetscBool      roworiented=a->roworiented;
2163   MatScalar      *ap,value,*aa=a->a,*bap;
2164 
2165   PetscFunctionBegin;
2166   for (k=0; k<m; k++) { /* loop over added rows */
2167     row  = im[k];
2168     brow = row/bs;
2169     if (row < 0) continue;
2170 #if defined(PETSC_USE_DEBUG)
2171     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2172 #endif
2173     rp   = aj + ai[brow];
2174     ap   = aa + bs2*ai[brow];
2175     rmax = imax[brow];
2176     nrow = ailen[brow];
2177     low  = 0;
2178     high = nrow;
2179     for (l=0; l<n; l++) { /* loop over added columns */
2180       if (in[l] < 0) continue;
2181 #if defined(PETSC_USE_DEBUG)
2182       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2183 #endif
2184       col  = in[l]; bcol = col/bs;
2185       ridx = row % bs; cidx = col % bs;
2186       if (roworiented) {
2187         value = v[l + k*n];
2188       } else {
2189         value = v[k + l*m];
2190       }
2191       if (col <= lastcol) low = 0; else high = nrow;
2192       lastcol = col;
2193       while (high-low > 7) {
2194         t = (low+high)/2;
2195         if (rp[t] > bcol) high = t;
2196         else              low  = t;
2197       }
2198       for (i=low; i<high; i++) {
2199         if (rp[i] > bcol) break;
2200         if (rp[i] == bcol) {
2201           bap = ap +  bs2*i + bs*cidx + ridx;
2202           if (is == ADD_VALUES) *bap += value;
2203           else                  *bap  = value;
2204           goto noinsert1;
2205         }
2206       }
2207       if (nonew == 1) goto noinsert1;
2208       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2209       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2210       N = nrow++ - 1; high++;
2211       /* shift up all the later entries in this row */
2212       for (ii=N; ii>=i; ii--) {
2213         rp[ii+1] = rp[ii];
2214         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2215       }
2216       if (N>=i) {
2217         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2218       }
2219       rp[i]                      = bcol;
2220       ap[bs2*i + bs*cidx + ridx] = value;
2221       a->nz++;
2222       A->nonzerostate++;
2223 noinsert1:;
2224       low = i;
2225     }
2226     ailen[brow] = nrow;
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2233 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2234 {
2235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2236   Mat            outA;
2237   PetscErrorCode ierr;
2238   PetscBool      row_identity,col_identity;
2239 
2240   PetscFunctionBegin;
2241   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2242   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2243   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2244   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2245 
2246   outA            = inA;
2247   inA->factortype = MAT_FACTOR_LU;
2248 
2249   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2250 
2251   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2252   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2253   a->row = row;
2254   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2255   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2256   a->col = col;
2257 
2258   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2259   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2260   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2261   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2262 
2263   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2264   if (!a->solve_work) {
2265     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2266     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2267   }
2268   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2274 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2275 {
2276   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2277   PetscInt    i,nz,mbs;
2278 
2279   PetscFunctionBegin;
2280   nz  = baij->maxnz;
2281   mbs = baij->mbs;
2282   for (i=0; i<nz; i++) {
2283     baij->j[i] = indices[i];
2284   }
2285   baij->nz = nz;
2286   for (i=0; i<mbs; i++) {
2287     baij->ilen[i] = baij->imax[i];
2288   }
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2294 /*@
2295     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2296        in the matrix.
2297 
2298   Input Parameters:
2299 +  mat - the SeqBAIJ matrix
2300 -  indices - the column indices
2301 
2302   Level: advanced
2303 
2304   Notes:
2305     This can be called if you have precomputed the nonzero structure of the
2306   matrix and want to provide it to the matrix object to improve the performance
2307   of the MatSetValues() operation.
2308 
2309     You MUST have set the correct numbers of nonzeros per row in the call to
2310   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2311 
2312     MUST be called before any calls to MatSetValues();
2313 
2314 @*/
2315 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2316 {
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2321   PetscValidPointer(indices,2);
2322   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 #undef __FUNCT__
2327 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2328 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2329 {
2330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2331   PetscErrorCode ierr;
2332   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2333   PetscReal      atmp;
2334   PetscScalar    *x,zero = 0.0;
2335   MatScalar      *aa;
2336   PetscInt       ncols,brow,krow,kcol;
2337 
2338   PetscFunctionBegin;
2339   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2340   bs  = A->rmap->bs;
2341   aa  = a->a;
2342   ai  = a->i;
2343   aj  = a->j;
2344   mbs = a->mbs;
2345 
2346   ierr = VecSet(v,zero);CHKERRQ(ierr);
2347   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2348   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2349   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2350   for (i=0; i<mbs; i++) {
2351     ncols = ai[1] - ai[0]; ai++;
2352     brow  = bs*i;
2353     for (j=0; j<ncols; j++) {
2354       for (kcol=0; kcol<bs; kcol++) {
2355         for (krow=0; krow<bs; krow++) {
2356           atmp = PetscAbsScalar(*aa);aa++;
2357           row  = brow + krow;   /* row index */
2358           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2359         }
2360       }
2361       aj++;
2362     }
2363   }
2364   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatCopy_SeqBAIJ"
2370 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2371 {
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   /* If the two matrices have the same copy implementation, use fast copy. */
2376   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2377     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2378     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2379     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2380 
2381     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2382     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2383     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2384   } else {
2385     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2386   }
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2392 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2393 {
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2403 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2404 {
2405   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2406 
2407   PetscFunctionBegin;
2408   *array = a->a;
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2414 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2415 {
2416   PetscFunctionBegin;
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2422 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2423 {
2424   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2425   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2426   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   /* Set the number of nonzeros in the new matrix */
2431   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2437 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2438 {
2439   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2440   PetscErrorCode ierr;
2441   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2442   PetscBLASInt   one=1;
2443 
2444   PetscFunctionBegin;
2445   if (str == SAME_NONZERO_PATTERN) {
2446     PetscScalar  alpha = a;
2447     PetscBLASInt bnz;
2448     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2449     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2450     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2451   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2452     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2453   } else {
2454     Mat      B;
2455     PetscInt *nnz;
2456     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2457     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2458     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2459     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2460     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2461     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2462     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2463     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2464     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2465     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2466     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2467     ierr = PetscFree(nnz);CHKERRQ(ierr);
2468   }
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2474 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2475 {
2476   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2477   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2478   MatScalar   *aa = a->a;
2479 
2480   PetscFunctionBegin;
2481   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2487 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2488 {
2489   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2490   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2491   MatScalar   *aa = a->a;
2492 
2493   PetscFunctionBegin;
2494   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2500 /*
2501     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2502 */
2503 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2504 {
2505   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2506   PetscErrorCode ierr;
2507   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2508   PetscInt       nz = a->i[m],row,*jj,mr,col;
2509 
2510   PetscFunctionBegin;
2511   *nn = n;
2512   if (!ia) PetscFunctionReturn(0);
2513   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2514   else {
2515     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2516     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2517     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2518     jj   = a->j;
2519     for (i=0; i<nz; i++) {
2520       collengths[jj[i]]++;
2521     }
2522     cia[0] = oshift;
2523     for (i=0; i<n; i++) {
2524       cia[i+1] = cia[i] + collengths[i];
2525     }
2526     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2527     jj   = a->j;
2528     for (row=0; row<m; row++) {
2529       mr = a->i[row+1] - a->i[row];
2530       for (i=0; i<mr; i++) {
2531         col = *jj++;
2532 
2533         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2534       }
2535     }
2536     ierr = PetscFree(collengths);CHKERRQ(ierr);
2537     *ia  = cia; *ja = cja;
2538   }
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2544 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2545 {
2546   PetscErrorCode ierr;
2547 
2548   PetscFunctionBegin;
2549   if (!ia) PetscFunctionReturn(0);
2550   ierr = PetscFree(*ia);CHKERRQ(ierr);
2551   ierr = PetscFree(*ja);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 /*
2556  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2557  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2558  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2559  */
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2562 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2563 {
2564   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2565   PetscErrorCode ierr;
2566   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2567   PetscInt       nz = a->i[m],row,*jj,mr,col;
2568   PetscInt       *cspidx;
2569 
2570   PetscFunctionBegin;
2571   *nn = n;
2572   if (!ia) PetscFunctionReturn(0);
2573 
2574   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2575   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2576   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2577   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2578   jj   = a->j;
2579   for (i=0; i<nz; i++) {
2580     collengths[jj[i]]++;
2581   }
2582   cia[0] = oshift;
2583   for (i=0; i<n; i++) {
2584     cia[i+1] = cia[i] + collengths[i];
2585   }
2586   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2587   jj   = a->j;
2588   for (row=0; row<m; row++) {
2589     mr = a->i[row+1] - a->i[row];
2590     for (i=0; i<mr; i++) {
2591       col = *jj++;
2592       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2593       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2594     }
2595   }
2596   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2597   *ia    = cia; *ja = cja;
2598   *spidx = cspidx;
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2604 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2605 {
2606   PetscErrorCode ierr;
2607 
2608   PetscFunctionBegin;
2609   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2610   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatShift_SeqBAIJ"
2616 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2617 {
2618   PetscErrorCode ierr;
2619   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2620 
2621   PetscFunctionBegin;
2622   if (!Y->preallocated || !aij->nz) {
2623     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2624   }
2625   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2626   PetscFunctionReturn(0);
2627 }
2628 
2629 /* -------------------------------------------------------------------*/
2630 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2631                                        MatGetRow_SeqBAIJ,
2632                                        MatRestoreRow_SeqBAIJ,
2633                                        MatMult_SeqBAIJ_N,
2634                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2635                                        MatMultTranspose_SeqBAIJ,
2636                                        MatMultTransposeAdd_SeqBAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                /* 10*/ 0,
2641                                        MatLUFactor_SeqBAIJ,
2642                                        0,
2643                                        0,
2644                                        MatTranspose_SeqBAIJ,
2645                                /* 15*/ MatGetInfo_SeqBAIJ,
2646                                        MatEqual_SeqBAIJ,
2647                                        MatGetDiagonal_SeqBAIJ,
2648                                        MatDiagonalScale_SeqBAIJ,
2649                                        MatNorm_SeqBAIJ,
2650                                /* 20*/ 0,
2651                                        MatAssemblyEnd_SeqBAIJ,
2652                                        MatSetOption_SeqBAIJ,
2653                                        MatZeroEntries_SeqBAIJ,
2654                                /* 24*/ MatZeroRows_SeqBAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                /* 29*/ MatSetUp_SeqBAIJ,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                /* 34*/ MatDuplicate_SeqBAIJ,
2665                                        0,
2666                                        0,
2667                                        MatILUFactor_SeqBAIJ,
2668                                        0,
2669                                /* 39*/ MatAXPY_SeqBAIJ,
2670                                        MatGetSubMatrices_SeqBAIJ,
2671                                        MatIncreaseOverlap_SeqBAIJ,
2672                                        MatGetValues_SeqBAIJ,
2673                                        MatCopy_SeqBAIJ,
2674                                /* 44*/ 0,
2675                                        MatScale_SeqBAIJ,
2676                                        MatShift_SeqBAIJ,
2677                                        0,
2678                                        MatZeroRowsColumns_SeqBAIJ,
2679                                /* 49*/ 0,
2680                                        MatGetRowIJ_SeqBAIJ,
2681                                        MatRestoreRowIJ_SeqBAIJ,
2682                                        MatGetColumnIJ_SeqBAIJ,
2683                                        MatRestoreColumnIJ_SeqBAIJ,
2684                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        MatSetValuesBlocked_SeqBAIJ,
2689                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2690                                        MatDestroy_SeqBAIJ,
2691                                        MatView_SeqBAIJ,
2692                                        0,
2693                                        0,
2694                                /* 64*/ 0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2700                                        0,
2701                                        MatConvert_Basic,
2702                                        0,
2703                                        0,
2704                                /* 74*/ 0,
2705                                        MatFDColoringApply_BAIJ,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                /* 79*/ 0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                        MatLoad_SeqBAIJ,
2714                                /* 84*/ 0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                /* 89*/ 0,
2720                                        0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                /* 94*/ 0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                /* 99*/ 0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                /*104*/ 0,
2735                                        MatRealPart_SeqBAIJ,
2736                                        MatImaginaryPart_SeqBAIJ,
2737                                        0,
2738                                        0,
2739                                /*109*/ 0,
2740                                        0,
2741                                        0,
2742                                        0,
2743                                        MatMissingDiagonal_SeqBAIJ,
2744                                /*114*/ 0,
2745                                        0,
2746                                        0,
2747                                        0,
2748                                        0,
2749                                /*119*/ 0,
2750                                        0,
2751                                        MatMultHermitianTranspose_SeqBAIJ,
2752                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2753                                        0,
2754                                /*124*/ 0,
2755                                        0,
2756                                        MatInvertBlockDiagonal_SeqBAIJ,
2757                                        0,
2758                                        0,
2759                                /*129*/ 0,
2760                                        0,
2761                                        0,
2762                                        0,
2763                                        0,
2764                                /*134*/ 0,
2765                                        0,
2766                                        0,
2767                                        0,
2768                                        0,
2769                                /*139*/ 0,
2770                                        0,
2771                                        0,
2772                                        MatFDColoringSetUp_SeqXAIJ,
2773                                        0,
2774                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2775 };
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2779 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2780 {
2781   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2782   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2783   PetscErrorCode ierr;
2784 
2785   PetscFunctionBegin;
2786   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2787 
2788   /* allocate space for values if not already there */
2789   if (!aij->saved_values) {
2790     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2791     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2792   }
2793 
2794   /* copy values over */
2795   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 #undef __FUNCT__
2800 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2801 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2802 {
2803   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2804   PetscErrorCode ierr;
2805   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2806 
2807   PetscFunctionBegin;
2808   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2809   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2810 
2811   /* copy values over */
2812   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2817 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2818 
2819 #undef __FUNCT__
2820 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2821 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2822 {
2823   Mat_SeqBAIJ    *b;
2824   PetscErrorCode ierr;
2825   PetscInt       i,mbs,nbs,bs2;
2826   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2827 
2828   PetscFunctionBegin;
2829   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2830   if (nz == MAT_SKIP_ALLOCATION) {
2831     skipallocation = PETSC_TRUE;
2832     nz             = 0;
2833   }
2834 
2835   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2836   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2837   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2838   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2839 
2840   B->preallocated = PETSC_TRUE;
2841 
2842   mbs = B->rmap->n/bs;
2843   nbs = B->cmap->n/bs;
2844   bs2 = bs*bs;
2845 
2846   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2847 
2848   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2849   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2850   if (nnz) {
2851     for (i=0; i<mbs; i++) {
2852       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2853       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2854     }
2855   }
2856 
2857   b    = (Mat_SeqBAIJ*)B->data;
2858   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2859   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2860   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2861 
2862   if (!flg) {
2863     switch (bs) {
2864     case 1:
2865       B->ops->mult    = MatMult_SeqBAIJ_1;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2867       break;
2868     case 2:
2869       B->ops->mult    = MatMult_SeqBAIJ_2;
2870       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2871       break;
2872     case 3:
2873       B->ops->mult    = MatMult_SeqBAIJ_3;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2875       break;
2876     case 4:
2877       B->ops->mult    = MatMult_SeqBAIJ_4;
2878       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2879       break;
2880     case 5:
2881       B->ops->mult    = MatMult_SeqBAIJ_5;
2882       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2883       break;
2884     case 6:
2885       B->ops->mult    = MatMult_SeqBAIJ_6;
2886       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2887       break;
2888     case 7:
2889       B->ops->mult    = MatMult_SeqBAIJ_7;
2890       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2891       break;
2892     case 15:
2893       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2894       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2895       break;
2896     default:
2897       B->ops->mult    = MatMult_SeqBAIJ_N;
2898       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2899       break;
2900     }
2901   }
2902   B->ops->sor = MatSOR_SeqBAIJ;
2903   b->mbs = mbs;
2904   b->nbs = nbs;
2905   if (!skipallocation) {
2906     if (!b->imax) {
2907       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2908       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2909 
2910       b->free_imax_ilen = PETSC_TRUE;
2911     }
2912     /* b->ilen will count nonzeros in each block row so far. */
2913     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2914     if (!nnz) {
2915       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2916       else if (nz < 0) nz = 1;
2917       for (i=0; i<mbs; i++) b->imax[i] = nz;
2918       nz = nz*mbs;
2919     } else {
2920       nz = 0;
2921       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2922     }
2923 
2924     /* allocate the matrix space */
2925     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2926     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2927     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2928     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2929     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2930 
2931     b->singlemalloc = PETSC_TRUE;
2932     b->i[0]         = 0;
2933     for (i=1; i<mbs+1; i++) {
2934       b->i[i] = b->i[i-1] + b->imax[i-1];
2935     }
2936     b->free_a  = PETSC_TRUE;
2937     b->free_ij = PETSC_TRUE;
2938   } else {
2939     b->free_a  = PETSC_FALSE;
2940     b->free_ij = PETSC_FALSE;
2941   }
2942 
2943   b->bs2              = bs2;
2944   b->mbs              = mbs;
2945   b->nz               = 0;
2946   b->maxnz            = nz;
2947   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2948   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2954 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2955 {
2956   PetscInt       i,m,nz,nz_max=0,*nnz;
2957   PetscScalar    *values=0;
2958   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2959   PetscErrorCode ierr;
2960 
2961   PetscFunctionBegin;
2962   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2963   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2964   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2965   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2966   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2967   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2968   m    = B->rmap->n/bs;
2969 
2970   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2971   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2972   for (i=0; i<m; i++) {
2973     nz = ii[i+1]- ii[i];
2974     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2975     nz_max = PetscMax(nz_max, nz);
2976     nnz[i] = nz;
2977   }
2978   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2979   ierr = PetscFree(nnz);CHKERRQ(ierr);
2980 
2981   values = (PetscScalar*)V;
2982   if (!values) {
2983     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2984   }
2985   for (i=0; i<m; i++) {
2986     PetscInt          ncols  = ii[i+1] - ii[i];
2987     const PetscInt    *icols = jj + ii[i];
2988     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2989     if (!roworiented) {
2990       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2991     } else {
2992       PetscInt j;
2993       for (j=0; j<ncols; j++) {
2994         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2995         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2996       }
2997     }
2998   }
2999   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3000   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3001   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3002   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 /*MC
3007    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3008    block sparse compressed row format.
3009 
3010    Options Database Keys:
3011 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3012 
3013   Level: beginner
3014 
3015 .seealso: MatCreateSeqBAIJ()
3016 M*/
3017 
3018 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3019 
3020 #undef __FUNCT__
3021 #define __FUNCT__ "MatCreate_SeqBAIJ"
3022 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3023 {
3024   PetscErrorCode ierr;
3025   PetscMPIInt    size;
3026   Mat_SeqBAIJ    *b;
3027 
3028   PetscFunctionBegin;
3029   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3030   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3031 
3032   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3033   B->data = (void*)b;
3034   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3035 
3036   b->row          = 0;
3037   b->col          = 0;
3038   b->icol         = 0;
3039   b->reallocs     = 0;
3040   b->saved_values = 0;
3041 
3042   b->roworiented        = PETSC_TRUE;
3043   b->nonew              = 0;
3044   b->diag               = 0;
3045   B->spptr              = 0;
3046   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3047   b->keepnonzeropattern = PETSC_FALSE;
3048 
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3060   PetscFunctionReturn(0);
3061 }
3062 
3063 #undef __FUNCT__
3064 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3065 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3066 {
3067   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3068   PetscErrorCode ierr;
3069   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3070 
3071   PetscFunctionBegin;
3072   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3073 
3074   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3075     c->imax           = a->imax;
3076     c->ilen           = a->ilen;
3077     c->free_imax_ilen = PETSC_FALSE;
3078   } else {
3079     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3080     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3081     for (i=0; i<mbs; i++) {
3082       c->imax[i] = a->imax[i];
3083       c->ilen[i] = a->ilen[i];
3084     }
3085     c->free_imax_ilen = PETSC_TRUE;
3086   }
3087 
3088   /* allocate the matrix space */
3089   if (mallocmatspace) {
3090     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3091       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3092       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3093 
3094       c->i            = a->i;
3095       c->j            = a->j;
3096       c->singlemalloc = PETSC_FALSE;
3097       c->free_a       = PETSC_TRUE;
3098       c->free_ij      = PETSC_FALSE;
3099       c->parent       = A;
3100       C->preallocated = PETSC_TRUE;
3101       C->assembled    = PETSC_TRUE;
3102 
3103       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3104       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3105       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3106     } else {
3107       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3108       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3109 
3110       c->singlemalloc = PETSC_TRUE;
3111       c->free_a       = PETSC_TRUE;
3112       c->free_ij      = PETSC_TRUE;
3113 
3114       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3115       if (mbs > 0) {
3116         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3117         if (cpvalues == MAT_COPY_VALUES) {
3118           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3119         } else {
3120           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3121         }
3122       }
3123       C->preallocated = PETSC_TRUE;
3124       C->assembled    = PETSC_TRUE;
3125     }
3126   }
3127 
3128   c->roworiented = a->roworiented;
3129   c->nonew       = a->nonew;
3130 
3131   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3132   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3133 
3134   c->bs2         = a->bs2;
3135   c->mbs         = a->mbs;
3136   c->nbs         = a->nbs;
3137 
3138   if (a->diag) {
3139     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3140       c->diag      = a->diag;
3141       c->free_diag = PETSC_FALSE;
3142     } else {
3143       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3144       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3145       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3146       c->free_diag = PETSC_TRUE;
3147     }
3148   } else c->diag = 0;
3149 
3150   c->nz         = a->nz;
3151   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3152   c->solve_work = NULL;
3153   c->mult_work  = NULL;
3154   c->sor_workt  = NULL;
3155   c->sor_work   = NULL;
3156 
3157   c->compressedrow.use   = a->compressedrow.use;
3158   c->compressedrow.nrows = a->compressedrow.nrows;
3159   if (a->compressedrow.use) {
3160     i    = a->compressedrow.nrows;
3161     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3162     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3163     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3164     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3165   } else {
3166     c->compressedrow.use    = PETSC_FALSE;
3167     c->compressedrow.i      = NULL;
3168     c->compressedrow.rindex = NULL;
3169   }
3170   C->nonzerostate = A->nonzerostate;
3171 
3172   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3173   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3174   PetscFunctionReturn(0);
3175 }
3176 
3177 #undef __FUNCT__
3178 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3179 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3180 {
3181   PetscErrorCode ierr;
3182 
3183   PetscFunctionBegin;
3184   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3185   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3186   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3187   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 #undef __FUNCT__
3192 #define __FUNCT__ "MatLoad_SeqBAIJ"
3193 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3194 {
3195   Mat_SeqBAIJ    *a;
3196   PetscErrorCode ierr;
3197   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3198   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3199   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3200   PetscInt       *masked,nmask,tmp,bs2,ishift;
3201   PetscMPIInt    size;
3202   int            fd;
3203   PetscScalar    *aa;
3204   MPI_Comm       comm;
3205 
3206   PetscFunctionBegin;
3207   /* force binary viewer to load .info file if it has not yet done so */
3208   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3209   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3210   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3211   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3212   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3213   if (bs < 0) bs = 1;
3214   bs2  = bs*bs;
3215 
3216   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3217   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3218   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3219   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3220   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3221   M = header[1]; N = header[2]; nz = header[3];
3222 
3223   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3224   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3225 
3226   /*
3227      This code adds extra rows to make sure the number of rows is
3228     divisible by the blocksize
3229   */
3230   mbs        = M/bs;
3231   extra_rows = bs - M + bs*(mbs);
3232   if (extra_rows == bs) extra_rows = 0;
3233   else mbs++;
3234   if (extra_rows) {
3235     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3236   }
3237 
3238   /* Set global sizes if not already set */
3239   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3240     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3241   } else { /* Check if the matrix global sizes are correct */
3242     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3243     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3244       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3245     }
3246     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3247   }
3248 
3249   /* read in row lengths */
3250   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3251   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3252   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3253 
3254   /* read in column indices */
3255   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3256   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3257   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3258 
3259   /* loop over row lengths determining block row lengths */
3260   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3261   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3262   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3263   rowcount = 0;
3264   nzcount  = 0;
3265   for (i=0; i<mbs; i++) {
3266     nmask = 0;
3267     for (j=0; j<bs; j++) {
3268       kmax = rowlengths[rowcount];
3269       for (k=0; k<kmax; k++) {
3270         tmp = jj[nzcount++]/bs;
3271         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3272       }
3273       rowcount++;
3274     }
3275     browlengths[i] += nmask;
3276     /* zero out the mask elements we set */
3277     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3278   }
3279 
3280   /* Do preallocation  */
3281   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3282   a    = (Mat_SeqBAIJ*)newmat->data;
3283 
3284   /* set matrix "i" values */
3285   a->i[0] = 0;
3286   for (i=1; i<= mbs; i++) {
3287     a->i[i]      = a->i[i-1] + browlengths[i-1];
3288     a->ilen[i-1] = browlengths[i-1];
3289   }
3290   a->nz = 0;
3291   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3292 
3293   /* read in nonzero values */
3294   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3295   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3296   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3297 
3298   /* set "a" and "j" values into matrix */
3299   nzcount = 0; jcount = 0;
3300   for (i=0; i<mbs; i++) {
3301     nzcountb = nzcount;
3302     nmask    = 0;
3303     for (j=0; j<bs; j++) {
3304       kmax = rowlengths[i*bs+j];
3305       for (k=0; k<kmax; k++) {
3306         tmp = jj[nzcount++]/bs;
3307         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3308       }
3309     }
3310     /* sort the masked values */
3311     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3312 
3313     /* set "j" values into matrix */
3314     maskcount = 1;
3315     for (j=0; j<nmask; j++) {
3316       a->j[jcount++]  = masked[j];
3317       mask[masked[j]] = maskcount++;
3318     }
3319     /* set "a" values into matrix */
3320     ishift = bs2*a->i[i];
3321     for (j=0; j<bs; j++) {
3322       kmax = rowlengths[i*bs+j];
3323       for (k=0; k<kmax; k++) {
3324         tmp       = jj[nzcountb]/bs;
3325         block     = mask[tmp] - 1;
3326         point     = jj[nzcountb] - bs*tmp;
3327         idx       = ishift + bs2*block + j + bs*point;
3328         a->a[idx] = (MatScalar)aa[nzcountb++];
3329       }
3330     }
3331     /* zero out the mask elements we set */
3332     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3333   }
3334   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3335 
3336   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3337   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3338   ierr = PetscFree(aa);CHKERRQ(ierr);
3339   ierr = PetscFree(jj);CHKERRQ(ierr);
3340   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3341 
3342   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3343   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3344   PetscFunctionReturn(0);
3345 }
3346 
3347 #undef __FUNCT__
3348 #define __FUNCT__ "MatCreateSeqBAIJ"
3349 /*@C
3350    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3351    compressed row) format.  For good matrix assembly performance the
3352    user should preallocate the matrix storage by setting the parameter nz
3353    (or the array nnz).  By setting these parameters accurately, performance
3354    during matrix assembly can be increased by more than a factor of 50.
3355 
3356    Collective on MPI_Comm
3357 
3358    Input Parameters:
3359 +  comm - MPI communicator, set to PETSC_COMM_SELF
3360 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3361           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3362 .  m - number of rows
3363 .  n - number of columns
3364 .  nz - number of nonzero blocks  per block row (same for all rows)
3365 -  nnz - array containing the number of nonzero blocks in the various block rows
3366          (possibly different for each block row) or NULL
3367 
3368    Output Parameter:
3369 .  A - the matrix
3370 
3371    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3372    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3373    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3374 
3375    Options Database Keys:
3376 .   -mat_no_unroll - uses code that does not unroll the loops in the
3377                      block calculations (much slower)
3378 .    -mat_block_size - size of the blocks to use
3379 
3380    Level: intermediate
3381 
3382    Notes:
3383    The number of rows and columns must be divisible by blocksize.
3384 
3385    If the nnz parameter is given then the nz parameter is ignored
3386 
3387    A nonzero block is any block that as 1 or more nonzeros in it
3388 
3389    The block AIJ format is fully compatible with standard Fortran 77
3390    storage.  That is, the stored row and column indices can begin at
3391    either one (as in Fortran) or zero.  See the users' manual for details.
3392 
3393    Specify the preallocated storage with either nz or nnz (not both).
3394    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3395    allocation.  See Users-Manual: ch_mat for details.
3396    matrices.
3397 
3398 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3399 @*/
3400 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3401 {
3402   PetscErrorCode ierr;
3403 
3404   PetscFunctionBegin;
3405   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3406   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3407   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3408   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 #undef __FUNCT__
3413 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3414 /*@C
3415    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3416    per row in the matrix. For good matrix assembly performance the
3417    user should preallocate the matrix storage by setting the parameter nz
3418    (or the array nnz).  By setting these parameters accurately, performance
3419    during matrix assembly can be increased by more than a factor of 50.
3420 
3421    Collective on MPI_Comm
3422 
3423    Input Parameters:
3424 +  B - the matrix
3425 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3426           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3427 .  nz - number of block nonzeros per block row (same for all rows)
3428 -  nnz - array containing the number of block nonzeros in the various block rows
3429          (possibly different for each block row) or NULL
3430 
3431    Options Database Keys:
3432 .   -mat_no_unroll - uses code that does not unroll the loops in the
3433                      block calculations (much slower)
3434 .   -mat_block_size - size of the blocks to use
3435 
3436    Level: intermediate
3437 
3438    Notes:
3439    If the nnz parameter is given then the nz parameter is ignored
3440 
3441    You can call MatGetInfo() to get information on how effective the preallocation was;
3442    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3443    You can also run with the option -info and look for messages with the string
3444    malloc in them to see if additional memory allocation was needed.
3445 
3446    The block AIJ format is fully compatible with standard Fortran 77
3447    storage.  That is, the stored row and column indices can begin at
3448    either one (as in Fortran) or zero.  See the users' manual for details.
3449 
3450    Specify the preallocated storage with either nz or nnz (not both).
3451    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3452    allocation.  See Users-Manual: ch_mat for details.
3453 
3454 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3455 @*/
3456 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3457 {
3458   PetscErrorCode ierr;
3459 
3460   PetscFunctionBegin;
3461   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3462   PetscValidType(B,1);
3463   PetscValidLogicalCollectiveInt(B,bs,2);
3464   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3465   PetscFunctionReturn(0);
3466 }
3467 
3468 #undef __FUNCT__
3469 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3470 /*@C
3471    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3472    (the default sequential PETSc format).
3473 
3474    Collective on MPI_Comm
3475 
3476    Input Parameters:
3477 +  B - the matrix
3478 .  i - the indices into j for the start of each local row (starts with zero)
3479 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3480 -  v - optional values in the matrix
3481 
3482    Level: developer
3483 
3484    Notes:
3485    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3486    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3487    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3488    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3489    block column and the second index is over columns within a block.
3490 
3491 .keywords: matrix, aij, compressed row, sparse
3492 
3493 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3494 @*/
3495 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3496 {
3497   PetscErrorCode ierr;
3498 
3499   PetscFunctionBegin;
3500   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3501   PetscValidType(B,1);
3502   PetscValidLogicalCollectiveInt(B,bs,2);
3503   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3510 /*@
3511      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3512 
3513      Collective on MPI_Comm
3514 
3515    Input Parameters:
3516 +  comm - must be an MPI communicator of size 1
3517 .  bs - size of block
3518 .  m - number of rows
3519 .  n - number of columns
3520 .  i - row indices
3521 .  j - column indices
3522 -  a - matrix values
3523 
3524    Output Parameter:
3525 .  mat - the matrix
3526 
3527    Level: advanced
3528 
3529    Notes:
3530        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3531     once the matrix is destroyed
3532 
3533        You cannot set new nonzero locations into this matrix, that will generate an error.
3534 
3535        The i and j indices are 0 based
3536 
3537        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3538 
3539       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3540       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3541       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3542       with column-major ordering within blocks.
3543 
3544 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3545 
3546 @*/
3547 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3548 {
3549   PetscErrorCode ierr;
3550   PetscInt       ii;
3551   Mat_SeqBAIJ    *baij;
3552 
3553   PetscFunctionBegin;
3554   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3555   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3556 
3557   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3558   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3559   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3560   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3561   baij = (Mat_SeqBAIJ*)(*mat)->data;
3562   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3563   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3564 
3565   baij->i = i;
3566   baij->j = j;
3567   baij->a = a;
3568 
3569   baij->singlemalloc = PETSC_FALSE;
3570   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3571   baij->free_a       = PETSC_FALSE;
3572   baij->free_ij      = PETSC_FALSE;
3573 
3574   for (ii=0; ii<m; ii++) {
3575     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3576 #if defined(PETSC_USE_DEBUG)
3577     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3578 #endif
3579   }
3580 #if defined(PETSC_USE_DEBUG)
3581   for (ii=0; ii<baij->i[m]; ii++) {
3582     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3583     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3584   }
3585 #endif
3586 
3587   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3588   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3589   PetscFunctionReturn(0);
3590 }
3591 
3592 #undef __FUNCT__
3593 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3594 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3595 {
3596   PetscErrorCode ierr;
3597 
3598   PetscFunctionBegin;
3599   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3600   PetscFunctionReturn(0);
3601 }
3602