xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3b12b3d80f31bfd82eef2f2b56f11b59a25d1965)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912 
913   PetscFunctionBegin;
914   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915   stepval = (n-1)*4;
916   for (k=0; k<m; k++) { /* loop over added rows */
917     row  = im[k];
918     rp   = aj + ai[row];
919     ap   = aa + 16*ai[row];
920     nrow = ailen[row];
921     low  = 0;
922     high = nrow;
923     for (l=0; l<n; l++) { /* loop over added columns */
924       col = in[l];
925       if (col <= lastcol)  low = 0;
926       else                high = nrow;
927       lastcol = col;
928       value   = v + k*(stepval+4 + l)*4;
929       while (high-low > 7) {
930         t = (low+high)/2;
931         if (rp[t] > col) high = t;
932         else             low  = t;
933       }
934       for (i=low; i<high; i++) {
935         if (rp[i] > col) break;
936         if (rp[i] == col) {
937           bap = ap +  16*i;
938           for (ii=0; ii<4; ii++,value+=stepval) {
939             for (jj=ii; jj<16; jj+=4) {
940               bap[jj] += *value++;
941             }
942           }
943           goto noinsert2;
944         }
945       }
946       N = nrow++ - 1;
947       high++; /* added new column index thus must search to one higher than before */
948       /* shift up all the later entries in this row */
949       for (ii=N; ii>=i; ii--) {
950         rp[ii+1] = rp[ii];
951         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
952       }
953       if (N >= i) {
954         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
955       }
956       rp[i] = col;
957       bap   = ap +  16*i;
958       for (ii=0; ii<4; ii++,value+=stepval) {
959         for (jj=ii; jj<16; jj+=4) {
960           bap[jj] = *value++;
961         }
962       }
963       noinsert2:;
964       low = i;
965     }
966     ailen[row] = nrow;
967   }
968   PetscFunctionReturnVoid();
969 }
970 
971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
972 #define matsetvalues4_ MATSETVALUES4
973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974 #define matsetvalues4_ matsetvalues4
975 #endif
976 
977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978 {
979   Mat         A  = *AA;
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
982   PetscInt    *ai=a->i,*ailen=a->ilen;
983   PetscInt    *aj=a->j,brow,bcol;
984   PetscInt    ridx,cidx,lastcol = -1;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986 
987   PetscFunctionBegin;
988   for (k=0; k<m; k++) { /* loop over added rows */
989     row  = im[k]; brow = row/4;
990     rp   = aj + ai[brow];
991     ap   = aa + 16*ai[brow];
992     nrow = ailen[brow];
993     low  = 0;
994     high = nrow;
995     for (l=0; l<n; l++) { /* loop over added columns */
996       col   = in[l]; bcol = col/4;
997       ridx  = row % 4; cidx = col % 4;
998       value = v[l + k*n];
999       if (col <= lastcol)  low = 0;
1000       else                high = nrow;
1001       lastcol = col;
1002       while (high-low > 7) {
1003         t = (low+high)/2;
1004         if (rp[t] > bcol) high = t;
1005         else              low  = t;
1006       }
1007       for (i=low; i<high; i++) {
1008         if (rp[i] > bcol) break;
1009         if (rp[i] == bcol) {
1010           bap   = ap +  16*i + 4*cidx + ridx;
1011           *bap += value;
1012           goto noinsert1;
1013         }
1014       }
1015       N = nrow++ - 1;
1016       high++; /* added new column thus must search to one higher than before */
1017       /* shift up all the later entries in this row */
1018       for (ii=N; ii>=i; ii--) {
1019         rp[ii+1] = rp[ii];
1020         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1021       }
1022       if (N>=i) {
1023         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1024       }
1025       rp[i]                    = bcol;
1026       ap[16*i + 4*cidx + ridx] = value;
1027 noinsert1:;
1028       low = i;
1029     }
1030     ailen[brow] = nrow;
1031   }
1032   PetscFunctionReturnVoid();
1033 }
1034 
1035 /*
1036      Checks for missing diagonals
1037 */
1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1039 {
1040   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1041   PetscErrorCode ierr;
1042   PetscInt       *diag,*ii = a->i,i;
1043 
1044   PetscFunctionBegin;
1045   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1046   *missing = PETSC_FALSE;
1047   if (A->rmap->n > 0 && !ii) {
1048     *missing = PETSC_TRUE;
1049     if (d) *d = 0;
1050     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1051   } else {
1052     PetscInt n;
1053     n = PetscMin(a->mbs, a->nbs);
1054     diag = a->diag;
1055     for (i=0; i<n; i++) {
1056       if (diag[i] >= ii[i+1]) {
1057         *missing = PETSC_TRUE;
1058         if (d) *d = i;
1059         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1060         break;
1061       }
1062     }
1063   }
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1068 {
1069   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscInt       i,j,m = a->mbs;
1072 
1073   PetscFunctionBegin;
1074   if (!a->diag) {
1075     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1076     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1077     a->free_diag = PETSC_TRUE;
1078   }
1079   for (i=0; i<m; i++) {
1080     a->diag[i] = a->i[i+1];
1081     for (j=a->i[i]; j<a->i[i+1]; j++) {
1082       if (a->j[j] == i) {
1083         a->diag[i] = j;
1084         break;
1085       }
1086     }
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 
1092 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1093 {
1094   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1095   PetscErrorCode ierr;
1096   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1097   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1098 
1099   PetscFunctionBegin;
1100   *nn = n;
1101   if (!ia) PetscFunctionReturn(0);
1102   if (symmetric) {
1103     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1104     nz   = tia[n];
1105   } else {
1106     tia = a->i; tja = a->j;
1107   }
1108 
1109   if (!blockcompressed && bs > 1) {
1110     (*nn) *= bs;
1111     /* malloc & create the natural set of indices */
1112     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1113     if (n) {
1114       (*ia)[0] = oshift;
1115       for (j=1; j<bs; j++) {
1116         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1117       }
1118     }
1119 
1120     for (i=1; i<n; i++) {
1121       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1122       for (j=1; j<bs; j++) {
1123         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1124       }
1125     }
1126     if (n) {
1127       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1128     }
1129 
1130     if (inja) {
1131       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1132       cnt = 0;
1133       for (i=0; i<n; i++) {
1134         for (j=0; j<bs; j++) {
1135           for (k=tia[i]; k<tia[i+1]; k++) {
1136             for (l=0; l<bs; l++) {
1137               (*ja)[cnt++] = bs*tja[k] + l;
1138             }
1139           }
1140         }
1141       }
1142     }
1143 
1144     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1145       ierr = PetscFree(tia);CHKERRQ(ierr);
1146       ierr = PetscFree(tja);CHKERRQ(ierr);
1147     }
1148   } else if (oshift == 1) {
1149     if (symmetric) {
1150       nz = tia[A->rmap->n/bs];
1151       /*  add 1 to i and j indices */
1152       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1153       *ia = tia;
1154       if (ja) {
1155         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1156         *ja = tja;
1157       }
1158     } else {
1159       nz = a->i[A->rmap->n/bs];
1160       /* malloc space and  add 1 to i and j indices */
1161       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1162       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1163       if (ja) {
1164         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1165         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1166       }
1167     }
1168   } else {
1169     *ia = tia;
1170     if (ja) *ja = tja;
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176 {
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (!ia) PetscFunctionReturn(0);
1181   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182     ierr = PetscFree(*ia);CHKERRQ(ierr);
1183     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1189 {
1190   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194 #if defined(PETSC_USE_LOG)
1195   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1196 #endif
1197   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1198   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1199   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1200   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1201   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1202   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1203   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1204   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1205   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1206   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1207   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1208   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1209   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1210 
1211   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1212   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1213   ierr = PetscFree(A->data);CHKERRQ(ierr);
1214 
1215   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_HYPRE)
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1235 {
1236   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   switch (op) {
1241   case MAT_ROW_ORIENTED:
1242     a->roworiented = flg;
1243     break;
1244   case MAT_KEEP_NONZERO_PATTERN:
1245     a->keepnonzeropattern = flg;
1246     break;
1247   case MAT_NEW_NONZERO_LOCATIONS:
1248     a->nonew = (flg ? 0 : 1);
1249     break;
1250   case MAT_NEW_NONZERO_LOCATION_ERR:
1251     a->nonew = (flg ? -1 : 0);
1252     break;
1253   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1254     a->nonew = (flg ? -2 : 0);
1255     break;
1256   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1257     a->nounused = (flg ? -1 : 0);
1258     break;
1259   case MAT_NEW_DIAGONALS:
1260   case MAT_IGNORE_OFF_PROC_ENTRIES:
1261   case MAT_USE_HASH_TABLE:
1262     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1263     break;
1264   case MAT_SPD:
1265   case MAT_SYMMETRIC:
1266   case MAT_STRUCTURALLY_SYMMETRIC:
1267   case MAT_HERMITIAN:
1268   case MAT_SYMMETRY_ETERNAL:
1269   case MAT_SUBMAT_SINGLEIS:
1270   case MAT_STRUCTURE_ONLY:
1271     /* These options are handled directly by MatSetOption() */
1272     break;
1273   default:
1274     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1280 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1281 {
1282   PetscErrorCode ierr;
1283   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1284   MatScalar      *aa_i;
1285   PetscScalar    *v_i;
1286 
1287   PetscFunctionBegin;
1288   bs  = A->rmap->bs;
1289   bs2 = bs*bs;
1290   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1291 
1292   bn  = row/bs;   /* Block number */
1293   bp  = row % bs; /* Block Position */
1294   M   = ai[bn+1] - ai[bn];
1295   *nz = bs*M;
1296 
1297   if (v) {
1298     *v = 0;
1299     if (*nz) {
1300       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1301       for (i=0; i<M; i++) { /* for each block in the block row */
1302         v_i  = *v + i*bs;
1303         aa_i = aa + bs2*(ai[bn] + i);
1304         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1305       }
1306     }
1307   }
1308 
1309   if (idx) {
1310     *idx = 0;
1311     if (*nz) {
1312       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1313       for (i=0; i<M; i++) { /* for each block in the block row */
1314         idx_i = *idx + i*bs;
1315         itmp  = bs*aj[ai[bn] + i];
1316         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1317       }
1318     }
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324 {
1325   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1326   PetscErrorCode ierr;
1327 
1328   PetscFunctionBegin;
1329   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1334 {
1335   PetscErrorCode ierr;
1336 
1337   PetscFunctionBegin;
1338   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1339   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1344 {
1345   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1346   Mat            C;
1347   PetscErrorCode ierr;
1348   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1349   PetscInt       *rows,*cols,bs2=a->bs2;
1350   MatScalar      *array;
1351 
1352   PetscFunctionBegin;
1353   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1354     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1355 
1356     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1357     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1358     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1359     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1360     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1361     ierr = PetscFree(col);CHKERRQ(ierr);
1362   } else {
1363     C = *B;
1364   }
1365 
1366   array = a->a;
1367   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1368   for (i=0; i<mbs; i++) {
1369     cols[0] = i*bs;
1370     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1371     len = ai[i+1] - ai[i];
1372     for (j=0; j<len; j++) {
1373       rows[0] = (*aj++)*bs;
1374       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1375       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1376       array += bs2;
1377     }
1378   }
1379   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1380 
1381   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1382   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383 
1384   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1385     *B = C;
1386   } else {
1387     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1393 {
1394   PetscErrorCode ierr;
1395   Mat            Btrans;
1396 
1397   PetscFunctionBegin;
1398   *f   = PETSC_FALSE;
1399   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1400   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1401   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1406 {
1407   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1408   PetscErrorCode ierr;
1409   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1410   int            fd;
1411   PetscScalar    *aa;
1412   FILE           *file;
1413 
1414   PetscFunctionBegin;
1415   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1416   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1417   col_lens[0] = MAT_FILE_CLASSID;
1418 
1419   col_lens[1] = A->rmap->N;
1420   col_lens[2] = A->cmap->n;
1421   col_lens[3] = a->nz*bs2;
1422 
1423   /* store lengths of each row and write (including header) to file */
1424   count = 0;
1425   for (i=0; i<a->mbs; i++) {
1426     for (j=0; j<bs; j++) {
1427       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1428     }
1429   }
1430   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1431   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1432 
1433   /* store column indices (zero start index) */
1434   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1435   count = 0;
1436   for (i=0; i<a->mbs; i++) {
1437     for (j=0; j<bs; j++) {
1438       for (k=a->i[i]; k<a->i[i+1]; k++) {
1439         for (l=0; l<bs; l++) {
1440           jj[count++] = bs*a->j[k] + l;
1441         }
1442       }
1443     }
1444   }
1445   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1446   ierr = PetscFree(jj);CHKERRQ(ierr);
1447 
1448   /* store nonzero values */
1449   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1450   count = 0;
1451   for (i=0; i<a->mbs; i++) {
1452     for (j=0; j<bs; j++) {
1453       for (k=a->i[i]; k<a->i[i+1]; k++) {
1454         for (l=0; l<bs; l++) {
1455           aa[count++] = a->a[bs2*k + l*bs + j];
1456         }
1457       }
1458     }
1459   }
1460   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1461   ierr = PetscFree(aa);CHKERRQ(ierr);
1462 
1463   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1464   if (file) {
1465     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1471 {
1472   PetscErrorCode ierr;
1473   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1474   PetscInt       i,bs = A->rmap->bs,k;
1475 
1476   PetscFunctionBegin;
1477   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1478   for (i=0; i<a->mbs; i++) {
1479     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1480     for (k=a->i[i]; k<a->i[i+1]; k++) {
1481       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1482     }
1483     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1484   }
1485   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1490 {
1491   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1492   PetscErrorCode    ierr;
1493   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1494   PetscViewerFormat format;
1495 
1496   PetscFunctionBegin;
1497   if (A->structure_only) {
1498     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1499     PetscFunctionReturn(0);
1500   }
1501 
1502   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1503   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1504     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1505   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1506     const char *matname;
1507     Mat        aij;
1508     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1509     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1510     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1511     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1512     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1513   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1514       PetscFunctionReturn(0);
1515   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1516     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1517     for (i=0; i<a->mbs; i++) {
1518       for (j=0; j<bs; j++) {
1519         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1520         for (k=a->i[i]; k<a->i[i+1]; k++) {
1521           for (l=0; l<bs; l++) {
1522 #if defined(PETSC_USE_COMPLEX)
1523             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1524               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1525                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1526             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1527               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1528                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1529             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1530               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1531             }
1532 #else
1533             if (a->a[bs2*k + l*bs + j] != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1535             }
1536 #endif
1537           }
1538         }
1539         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1540       }
1541     }
1542     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1543   } else {
1544     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1545     for (i=0; i<a->mbs; i++) {
1546       for (j=0; j<bs; j++) {
1547         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1548         for (k=a->i[i]; k<a->i[i+1]; k++) {
1549           for (l=0; l<bs; l++) {
1550 #if defined(PETSC_USE_COMPLEX)
1551             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1552               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1553                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1554             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1555               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1556                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1557             } else {
1558               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1559             }
1560 #else
1561             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1562 #endif
1563           }
1564         }
1565         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1566       }
1567     }
1568     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1569   }
1570   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #include <petscdraw.h>
1575 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1576 {
1577   Mat               A = (Mat) Aa;
1578   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1579   PetscErrorCode    ierr;
1580   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1581   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1582   MatScalar         *aa;
1583   PetscViewer       viewer;
1584   PetscViewerFormat format;
1585 
1586   PetscFunctionBegin;
1587   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1588   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1589   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1590 
1591   /* loop over matrix elements drawing boxes */
1592 
1593   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1594     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1595     /* Blue for negative, Cyan for zero and  Red for positive */
1596     color = PETSC_DRAW_BLUE;
1597     for (i=0,row=0; i<mbs; i++,row+=bs) {
1598       for (j=a->i[i]; j<a->i[i+1]; j++) {
1599         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1600         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1601         aa  = a->a + j*bs2;
1602         for (k=0; k<bs; k++) {
1603           for (l=0; l<bs; l++) {
1604             if (PetscRealPart(*aa++) >=  0.) continue;
1605             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1606           }
1607         }
1608       }
1609     }
1610     color = PETSC_DRAW_CYAN;
1611     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615         aa  = a->a + j*bs2;
1616         for (k=0; k<bs; k++) {
1617           for (l=0; l<bs; l++) {
1618             if (PetscRealPart(*aa++) != 0.) continue;
1619             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1620           }
1621         }
1622       }
1623     }
1624     color = PETSC_DRAW_RED;
1625     for (i=0,row=0; i<mbs; i++,row+=bs) {
1626       for (j=a->i[i]; j<a->i[i+1]; j++) {
1627         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629         aa  = a->a + j*bs2;
1630         for (k=0; k<bs; k++) {
1631           for (l=0; l<bs; l++) {
1632             if (PetscRealPart(*aa++) <= 0.) continue;
1633             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1634           }
1635         }
1636       }
1637     }
1638     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1639   } else {
1640     /* use contour shading to indicate magnitude of values */
1641     /* first determine max of all nonzero values */
1642     PetscReal minv = 0.0, maxv = 0.0;
1643     PetscDraw popup;
1644 
1645     for (i=0; i<a->nz*a->bs2; i++) {
1646       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1647     }
1648     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1649     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1650     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1651 
1652     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1653     for (i=0,row=0; i<mbs; i++,row+=bs) {
1654       for (j=a->i[i]; j<a->i[i+1]; j++) {
1655         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1656         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1657         aa  = a->a + j*bs2;
1658         for (k=0; k<bs; k++) {
1659           for (l=0; l<bs; l++) {
1660             MatScalar v = *aa++;
1661             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1662             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1663           }
1664         }
1665       }
1666     }
1667     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1668   }
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1673 {
1674   PetscErrorCode ierr;
1675   PetscReal      xl,yl,xr,yr,w,h;
1676   PetscDraw      draw;
1677   PetscBool      isnull;
1678 
1679   PetscFunctionBegin;
1680   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1681   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1682   if (isnull) PetscFunctionReturn(0);
1683 
1684   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1685   xr  += w;          yr += h;        xl = -w;     yl = -h;
1686   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1687   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1688   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1689   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1690   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1695 {
1696   PetscErrorCode ierr;
1697   PetscBool      iascii,isbinary,isdraw;
1698 
1699   PetscFunctionBegin;
1700   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1701   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1702   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1703   if (iascii) {
1704     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1705   } else if (isbinary) {
1706     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1707   } else if (isdraw) {
1708     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1709   } else {
1710     Mat B;
1711     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1712     ierr = MatView(B,viewer);CHKERRQ(ierr);
1713     ierr = MatDestroy(&B);CHKERRQ(ierr);
1714   }
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 
1719 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1720 {
1721   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1722   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1723   PetscInt    *ai = a->i,*ailen = a->ilen;
1724   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1725   MatScalar   *ap,*aa = a->a;
1726 
1727   PetscFunctionBegin;
1728   for (k=0; k<m; k++) { /* loop over rows */
1729     row = im[k]; brow = row/bs;
1730     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1731     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1732     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1733     nrow = ailen[brow];
1734     for (l=0; l<n; l++) { /* loop over columns */
1735       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1736       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1737       col  = in[l];
1738       bcol = col/bs;
1739       cidx = col%bs;
1740       ridx = row%bs;
1741       high = nrow;
1742       low  = 0; /* assume unsorted */
1743       while (high-low > 5) {
1744         t = (low+high)/2;
1745         if (rp[t] > bcol) high = t;
1746         else             low  = t;
1747       }
1748       for (i=low; i<high; i++) {
1749         if (rp[i] > bcol) break;
1750         if (rp[i] == bcol) {
1751           *v++ = ap[bs2*i+bs*cidx+ridx];
1752           goto finished;
1753         }
1754       }
1755       *v++ = 0.0;
1756 finished:;
1757     }
1758   }
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1763 {
1764   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1765   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1766   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1767   PetscErrorCode    ierr;
1768   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1769   PetscBool         roworiented=a->roworiented;
1770   const PetscScalar *value     = v;
1771   MatScalar         *ap=NULL,*aa = a->a,*bap;
1772 
1773   PetscFunctionBegin;
1774   if (roworiented) {
1775     stepval = (n-1)*bs;
1776   } else {
1777     stepval = (m-1)*bs;
1778   }
1779   for (k=0; k<m; k++) { /* loop over added rows */
1780     row = im[k];
1781     if (row < 0) continue;
1782 #if defined(PETSC_USE_DEBUG)
1783     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1784 #endif
1785     rp   = aj + ai[row];
1786     if (!A->structure_only) ap = aa + bs2*ai[row];
1787     rmax = imax[row];
1788     nrow = ailen[row];
1789     low  = 0;
1790     high = nrow;
1791     for (l=0; l<n; l++) { /* loop over added columns */
1792       if (in[l] < 0) continue;
1793 #if defined(PETSC_USE_DEBUG)
1794       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1795 #endif
1796       col = in[l];
1797       if (!A->structure_only) {
1798         if (roworiented) {
1799           value = v + (k*(stepval+bs) + l)*bs;
1800         } else {
1801           value = v + (l*(stepval+bs) + k)*bs;
1802         }
1803       }
1804       if (col <= lastcol) low = 0;
1805       else high = nrow;
1806       lastcol = col;
1807       while (high-low > 7) {
1808         t = (low+high)/2;
1809         if (rp[t] > col) high = t;
1810         else             low  = t;
1811       }
1812       for (i=low; i<high; i++) {
1813         if (rp[i] > col) break;
1814         if (rp[i] == col) {
1815           if (A->structure_only) goto noinsert2;
1816           bap = ap +  bs2*i;
1817           if (roworiented) {
1818             if (is == ADD_VALUES) {
1819               for (ii=0; ii<bs; ii++,value+=stepval) {
1820                 for (jj=ii; jj<bs2; jj+=bs) {
1821                   bap[jj] += *value++;
1822                 }
1823               }
1824             } else {
1825               for (ii=0; ii<bs; ii++,value+=stepval) {
1826                 for (jj=ii; jj<bs2; jj+=bs) {
1827                   bap[jj] = *value++;
1828                 }
1829               }
1830             }
1831           } else {
1832             if (is == ADD_VALUES) {
1833               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1834                 for (jj=0; jj<bs; jj++) {
1835                   bap[jj] += value[jj];
1836                 }
1837                 bap += bs;
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1841                 for (jj=0; jj<bs; jj++) {
1842                   bap[jj]  = value[jj];
1843                 }
1844                 bap += bs;
1845               }
1846             }
1847           }
1848           goto noinsert2;
1849         }
1850       }
1851       if (nonew == 1) goto noinsert2;
1852       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1853       if (A->structure_only) {
1854         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1855       } else {
1856         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1857       }
1858       N = nrow++ - 1; high++;
1859       /* shift up all the later entries in this row */
1860       for (ii=N; ii>=i; ii--) {
1861         rp[ii+1] = rp[ii];
1862         if (!A->structure_only) {
1863           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1864         }
1865       }
1866       if (N >= i && !A->structure_only) {
1867         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1868       }
1869 
1870       rp[i] = col;
1871       if (!A->structure_only) {
1872         bap   = ap +  bs2*i;
1873         if (roworiented) {
1874           for (ii=0; ii<bs; ii++,value+=stepval) {
1875             for (jj=ii; jj<bs2; jj+=bs) {
1876               bap[jj] = *value++;
1877             }
1878           }
1879         } else {
1880           for (ii=0; ii<bs; ii++,value+=stepval) {
1881             for (jj=0; jj<bs; jj++) {
1882               *bap++ = *value++;
1883             }
1884           }
1885         }
1886       }
1887 noinsert2:;
1888       low = i;
1889     }
1890     ailen[row] = nrow;
1891   }
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1896 {
1897   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1898   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1899   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1900   PetscErrorCode ierr;
1901   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1902   MatScalar      *aa  = a->a,*ap;
1903   PetscReal      ratio=0.6;
1904 
1905   PetscFunctionBegin;
1906   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1907 
1908   if (m) rmax = ailen[0];
1909   for (i=1; i<mbs; i++) {
1910     /* move each row back by the amount of empty slots (fshift) before it*/
1911     fshift += imax[i-1] - ailen[i-1];
1912     rmax    = PetscMax(rmax,ailen[i]);
1913     if (fshift) {
1914       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1915       N  = ailen[i];
1916       for (j=0; j<N; j++) {
1917         ip[j-fshift] = ip[j];
1918         if (!A->structure_only) {
1919           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1920         }
1921       }
1922     }
1923     ai[i] = ai[i-1] + ailen[i-1];
1924   }
1925   if (mbs) {
1926     fshift += imax[mbs-1] - ailen[mbs-1];
1927     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1928   }
1929 
1930   /* reset ilen and imax for each row */
1931   a->nonzerorowcnt = 0;
1932   if (A->structure_only) {
1933     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1934   } else { /* !A->structure_only */
1935     for (i=0; i<mbs; i++) {
1936       ailen[i] = imax[i] = ai[i+1] - ai[i];
1937       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1938     }
1939   }
1940   a->nz = ai[mbs];
1941 
1942   /* diagonals may have moved, so kill the diagonal pointers */
1943   a->idiagvalid = PETSC_FALSE;
1944   if (fshift && a->diag) {
1945     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1946     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1947     a->diag = 0;
1948   }
1949   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1950   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1952   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1953 
1954   A->info.mallocs    += a->reallocs;
1955   a->reallocs         = 0;
1956   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1957   a->rmax             = rmax;
1958 
1959   if (!A->structure_only) {
1960     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1961   }
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 /*
1966    This function returns an array of flags which indicate the locations of contiguous
1967    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1968    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1969    Assume: sizes should be long enough to hold all the values.
1970 */
1971 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1972 {
1973   PetscInt  i,j,k,row;
1974   PetscBool flg;
1975 
1976   PetscFunctionBegin;
1977   for (i=0,j=0; i<n; j++) {
1978     row = idx[i];
1979     if (row%bs!=0) { /* Not the begining of a block */
1980       sizes[j] = 1;
1981       i++;
1982     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1983       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1984       i++;
1985     } else { /* Begining of the block, so check if the complete block exists */
1986       flg = PETSC_TRUE;
1987       for (k=1; k<bs; k++) {
1988         if (row+k != idx[i+k]) { /* break in the block */
1989           flg = PETSC_FALSE;
1990           break;
1991         }
1992       }
1993       if (flg) { /* No break in the bs */
1994         sizes[j] = bs;
1995         i       += bs;
1996       } else {
1997         sizes[j] = 1;
1998         i++;
1999       }
2000     }
2001   }
2002   *bs_max = j;
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2007 {
2008   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2009   PetscErrorCode    ierr;
2010   PetscInt          i,j,k,count,*rows;
2011   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2012   PetscScalar       zero = 0.0;
2013   MatScalar         *aa;
2014   const PetscScalar *xx;
2015   PetscScalar       *bb;
2016 
2017   PetscFunctionBegin;
2018   /* fix right hand side if needed */
2019   if (x && b) {
2020     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2021     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2022     for (i=0; i<is_n; i++) {
2023       bb[is_idx[i]] = diag*xx[is_idx[i]];
2024     }
2025     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2026     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2027   }
2028 
2029   /* Make a copy of the IS and  sort it */
2030   /* allocate memory for rows,sizes */
2031   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2032 
2033   /* copy IS values to rows, and sort them */
2034   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2035   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2036 
2037   if (baij->keepnonzeropattern) {
2038     for (i=0; i<is_n; i++) sizes[i] = 1;
2039     bs_max          = is_n;
2040   } else {
2041     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2042     A->nonzerostate++;
2043   }
2044 
2045   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2046     row = rows[j];
2047     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2048     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2049     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2050     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2051       if (diag != (PetscScalar)0.0) {
2052         if (baij->ilen[row/bs] > 0) {
2053           baij->ilen[row/bs]       = 1;
2054           baij->j[baij->i[row/bs]] = row/bs;
2055 
2056           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2057         }
2058         /* Now insert all the diagonal values for this bs */
2059         for (k=0; k<bs; k++) {
2060           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2061         }
2062       } else { /* (diag == 0.0) */
2063         baij->ilen[row/bs] = 0;
2064       } /* end (diag == 0.0) */
2065     } else { /* (sizes[i] != bs) */
2066 #if defined(PETSC_USE_DEBUG)
2067       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2068 #endif
2069       for (k=0; k<count; k++) {
2070         aa[0] =  zero;
2071         aa   += bs;
2072       }
2073       if (diag != (PetscScalar)0.0) {
2074         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2075       }
2076     }
2077   }
2078 
2079   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2080   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2085 {
2086   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2087   PetscErrorCode    ierr;
2088   PetscInt          i,j,k,count;
2089   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2090   PetscScalar       zero = 0.0;
2091   MatScalar         *aa;
2092   const PetscScalar *xx;
2093   PetscScalar       *bb;
2094   PetscBool         *zeroed,vecs = PETSC_FALSE;
2095 
2096   PetscFunctionBegin;
2097   /* fix right hand side if needed */
2098   if (x && b) {
2099     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2100     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2101     vecs = PETSC_TRUE;
2102   }
2103 
2104   /* zero the columns */
2105   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2106   for (i=0; i<is_n; i++) {
2107     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2108     zeroed[is_idx[i]] = PETSC_TRUE;
2109   }
2110   for (i=0; i<A->rmap->N; i++) {
2111     if (!zeroed[i]) {
2112       row = i/bs;
2113       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2114         for (k=0; k<bs; k++) {
2115           col = bs*baij->j[j] + k;
2116           if (zeroed[col]) {
2117             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2118             if (vecs) bb[i] -= aa[0]*xx[col];
2119             aa[0] = 0.0;
2120           }
2121         }
2122       }
2123     } else if (vecs) bb[i] = diag*xx[i];
2124   }
2125   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2126   if (vecs) {
2127     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2128     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2129   }
2130 
2131   /* zero the rows */
2132   for (i=0; i<is_n; i++) {
2133     row   = is_idx[i];
2134     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2135     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2136     for (k=0; k<count; k++) {
2137       aa[0] =  zero;
2138       aa   += bs;
2139     }
2140     if (diag != (PetscScalar)0.0) {
2141       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2142     }
2143   }
2144   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2149 {
2150   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2151   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2152   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2153   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2154   PetscErrorCode ierr;
2155   PetscInt       ridx,cidx,bs2=a->bs2;
2156   PetscBool      roworiented=a->roworiented;
2157   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2158 
2159   PetscFunctionBegin;
2160   for (k=0; k<m; k++) { /* loop over added rows */
2161     row  = im[k];
2162     brow = row/bs;
2163     if (row < 0) continue;
2164 #if defined(PETSC_USE_DEBUG)
2165     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2166 #endif
2167     rp   = aj + ai[brow];
2168     if (!A->structure_only) ap = aa + bs2*ai[brow];
2169     rmax = imax[brow];
2170     nrow = ailen[brow];
2171     low  = 0;
2172     high = nrow;
2173     for (l=0; l<n; l++) { /* loop over added columns */
2174       if (in[l] < 0) continue;
2175 #if defined(PETSC_USE_DEBUG)
2176       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2177 #endif
2178       col  = in[l]; bcol = col/bs;
2179       ridx = row % bs; cidx = col % bs;
2180       if (!A->structure_only) {
2181         if (roworiented) {
2182           value = v[l + k*n];
2183         } else {
2184           value = v[k + l*m];
2185         }
2186       }
2187       if (col <= lastcol) low = 0; else high = nrow;
2188       lastcol = col;
2189       while (high-low > 7) {
2190         t = (low+high)/2;
2191         if (rp[t] > bcol) high = t;
2192         else              low  = t;
2193       }
2194       for (i=low; i<high; i++) {
2195         if (rp[i] > bcol) break;
2196         if (rp[i] == bcol) {
2197           bap = ap +  bs2*i + bs*cidx + ridx;
2198           if (!A->structure_only) {
2199             if (is == ADD_VALUES) *bap += value;
2200             else                  *bap  = value;
2201           }
2202           goto noinsert1;
2203         }
2204       }
2205       if (nonew == 1) goto noinsert1;
2206       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2207       if (A->structure_only) {
2208         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2209       } else {
2210         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2211       }
2212       N = nrow++ - 1; high++;
2213       /* shift up all the later entries in this row */
2214       for (ii=N; ii>=i; ii--) {
2215         rp[ii+1] = rp[ii];
2216         if (!A->structure_only) {
2217           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2218         }
2219       }
2220       if (N>=i && !A->structure_only) {
2221         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2222       }
2223       rp[i]                      = bcol;
2224       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2225       a->nz++;
2226       A->nonzerostate++;
2227 noinsert1:;
2228       low = i;
2229     }
2230     ailen[brow] = nrow;
2231   }
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2236 {
2237   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2238   Mat            outA;
2239   PetscErrorCode ierr;
2240   PetscBool      row_identity,col_identity;
2241 
2242   PetscFunctionBegin;
2243   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2244   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2245   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2246   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2247 
2248   outA            = inA;
2249   inA->factortype = MAT_FACTOR_LU;
2250   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2251   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2252 
2253   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2254 
2255   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2256   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2257   a->row = row;
2258   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2259   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2260   a->col = col;
2261 
2262   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2263   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2264   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2265   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2266 
2267   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2268   if (!a->solve_work) {
2269     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2270     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2271   }
2272   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2277 {
2278   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2279   PetscInt    i,nz,mbs;
2280 
2281   PetscFunctionBegin;
2282   nz  = baij->maxnz;
2283   mbs = baij->mbs;
2284   for (i=0; i<nz; i++) {
2285     baij->j[i] = indices[i];
2286   }
2287   baij->nz = nz;
2288   for (i=0; i<mbs; i++) {
2289     baij->ilen[i] = baij->imax[i];
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 /*@
2295     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2296        in the matrix.
2297 
2298   Input Parameters:
2299 +  mat - the SeqBAIJ matrix
2300 -  indices - the column indices
2301 
2302   Level: advanced
2303 
2304   Notes:
2305     This can be called if you have precomputed the nonzero structure of the
2306   matrix and want to provide it to the matrix object to improve the performance
2307   of the MatSetValues() operation.
2308 
2309     You MUST have set the correct numbers of nonzeros per row in the call to
2310   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2311 
2312     MUST be called before any calls to MatSetValues();
2313 
2314 @*/
2315 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2316 {
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2321   PetscValidPointer(indices,2);
2322   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2327 {
2328   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2329   PetscErrorCode ierr;
2330   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2331   PetscReal      atmp;
2332   PetscScalar    *x,zero = 0.0;
2333   MatScalar      *aa;
2334   PetscInt       ncols,brow,krow,kcol;
2335 
2336   PetscFunctionBegin;
2337   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2338   bs  = A->rmap->bs;
2339   aa  = a->a;
2340   ai  = a->i;
2341   aj  = a->j;
2342   mbs = a->mbs;
2343 
2344   ierr = VecSet(v,zero);CHKERRQ(ierr);
2345   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2346   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2347   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2348   for (i=0; i<mbs; i++) {
2349     ncols = ai[1] - ai[0]; ai++;
2350     brow  = bs*i;
2351     for (j=0; j<ncols; j++) {
2352       for (kcol=0; kcol<bs; kcol++) {
2353         for (krow=0; krow<bs; krow++) {
2354           atmp = PetscAbsScalar(*aa);aa++;
2355           row  = brow + krow;   /* row index */
2356           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2357         }
2358       }
2359       aj++;
2360     }
2361   }
2362   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2367 {
2368   PetscErrorCode ierr;
2369 
2370   PetscFunctionBegin;
2371   /* If the two matrices have the same copy implementation, use fast copy. */
2372   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2373     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2374     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2375     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2376 
2377     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2378     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2379     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2380     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2381   } else {
2382     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2383   }
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2388 {
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2397 {
2398   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2399 
2400   PetscFunctionBegin;
2401   *array = a->a;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2406 {
2407   PetscFunctionBegin;
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2412 {
2413   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2414   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2415   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2416   PetscErrorCode ierr;
2417 
2418   PetscFunctionBegin;
2419   /* Set the number of nonzeros in the new matrix */
2420   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2425 {
2426   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2427   PetscErrorCode ierr;
2428   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2429   PetscBLASInt   one=1;
2430 
2431   PetscFunctionBegin;
2432   if (str == SAME_NONZERO_PATTERN) {
2433     PetscScalar  alpha = a;
2434     PetscBLASInt bnz;
2435     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2436     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2437     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2438   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2439     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2440   } else {
2441     Mat      B;
2442     PetscInt *nnz;
2443     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2444     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2445     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2446     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2447     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2448     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2449     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2450     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2451     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2452     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2453     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2454     ierr = PetscFree(nnz);CHKERRQ(ierr);
2455   }
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2460 {
2461   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2462   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2463   MatScalar   *aa = a->a;
2464 
2465   PetscFunctionBegin;
2466   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2471 {
2472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2473   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2474   MatScalar   *aa = a->a;
2475 
2476   PetscFunctionBegin;
2477   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 /*
2482     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2483 */
2484 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2485 {
2486   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2487   PetscErrorCode ierr;
2488   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2489   PetscInt       nz = a->i[m],row,*jj,mr,col;
2490 
2491   PetscFunctionBegin;
2492   *nn = n;
2493   if (!ia) PetscFunctionReturn(0);
2494   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2495   else {
2496     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2497     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2498     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2499     jj   = a->j;
2500     for (i=0; i<nz; i++) {
2501       collengths[jj[i]]++;
2502     }
2503     cia[0] = oshift;
2504     for (i=0; i<n; i++) {
2505       cia[i+1] = cia[i] + collengths[i];
2506     }
2507     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2508     jj   = a->j;
2509     for (row=0; row<m; row++) {
2510       mr = a->i[row+1] - a->i[row];
2511       for (i=0; i<mr; i++) {
2512         col = *jj++;
2513 
2514         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2515       }
2516     }
2517     ierr = PetscFree(collengths);CHKERRQ(ierr);
2518     *ia  = cia; *ja = cja;
2519   }
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2524 {
2525   PetscErrorCode ierr;
2526 
2527   PetscFunctionBegin;
2528   if (!ia) PetscFunctionReturn(0);
2529   ierr = PetscFree(*ia);CHKERRQ(ierr);
2530   ierr = PetscFree(*ja);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 /*
2535  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2536  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2537  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2538  */
2539 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2540 {
2541   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2542   PetscErrorCode ierr;
2543   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2544   PetscInt       nz = a->i[m],row,*jj,mr,col;
2545   PetscInt       *cspidx;
2546 
2547   PetscFunctionBegin;
2548   *nn = n;
2549   if (!ia) PetscFunctionReturn(0);
2550 
2551   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2552   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2553   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2554   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2555   jj   = a->j;
2556   for (i=0; i<nz; i++) {
2557     collengths[jj[i]]++;
2558   }
2559   cia[0] = oshift;
2560   for (i=0; i<n; i++) {
2561     cia[i+1] = cia[i] + collengths[i];
2562   }
2563   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2564   jj   = a->j;
2565   for (row=0; row<m; row++) {
2566     mr = a->i[row+1] - a->i[row];
2567     for (i=0; i<mr; i++) {
2568       col = *jj++;
2569       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2570       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2571     }
2572   }
2573   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2574   *ia    = cia; *ja = cja;
2575   *spidx = cspidx;
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2580 {
2581   PetscErrorCode ierr;
2582 
2583   PetscFunctionBegin;
2584   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2585   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2590 {
2591   PetscErrorCode ierr;
2592   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2593 
2594   PetscFunctionBegin;
2595   if (!Y->preallocated || !aij->nz) {
2596     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2597   }
2598   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 /* -------------------------------------------------------------------*/
2603 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2604                                        MatGetRow_SeqBAIJ,
2605                                        MatRestoreRow_SeqBAIJ,
2606                                        MatMult_SeqBAIJ_N,
2607                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2608                                        MatMultTranspose_SeqBAIJ,
2609                                        MatMultTransposeAdd_SeqBAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                /* 10*/ 0,
2614                                        MatLUFactor_SeqBAIJ,
2615                                        0,
2616                                        0,
2617                                        MatTranspose_SeqBAIJ,
2618                                /* 15*/ MatGetInfo_SeqBAIJ,
2619                                        MatEqual_SeqBAIJ,
2620                                        MatGetDiagonal_SeqBAIJ,
2621                                        MatDiagonalScale_SeqBAIJ,
2622                                        MatNorm_SeqBAIJ,
2623                                /* 20*/ 0,
2624                                        MatAssemblyEnd_SeqBAIJ,
2625                                        MatSetOption_SeqBAIJ,
2626                                        MatZeroEntries_SeqBAIJ,
2627                                /* 24*/ MatZeroRows_SeqBAIJ,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                /* 29*/ MatSetUp_SeqBAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                /* 34*/ MatDuplicate_SeqBAIJ,
2638                                        0,
2639                                        0,
2640                                        MatILUFactor_SeqBAIJ,
2641                                        0,
2642                                /* 39*/ MatAXPY_SeqBAIJ,
2643                                        MatCreateSubMatrices_SeqBAIJ,
2644                                        MatIncreaseOverlap_SeqBAIJ,
2645                                        MatGetValues_SeqBAIJ,
2646                                        MatCopy_SeqBAIJ,
2647                                /* 44*/ 0,
2648                                        MatScale_SeqBAIJ,
2649                                        MatShift_SeqBAIJ,
2650                                        0,
2651                                        MatZeroRowsColumns_SeqBAIJ,
2652                                /* 49*/ 0,
2653                                        MatGetRowIJ_SeqBAIJ,
2654                                        MatRestoreRowIJ_SeqBAIJ,
2655                                        MatGetColumnIJ_SeqBAIJ,
2656                                        MatRestoreColumnIJ_SeqBAIJ,
2657                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                        MatSetValuesBlocked_SeqBAIJ,
2662                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2663                                        MatDestroy_SeqBAIJ,
2664                                        MatView_SeqBAIJ,
2665                                        0,
2666                                        0,
2667                                /* 64*/ 0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2673                                        0,
2674                                        MatConvert_Basic,
2675                                        0,
2676                                        0,
2677                                /* 74*/ 0,
2678                                        MatFDColoringApply_BAIJ,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                /* 79*/ 0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        MatLoad_SeqBAIJ,
2687                                /* 84*/ 0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                /* 89*/ 0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 94*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                /* 99*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /*104*/ 0,
2708                                        MatRealPart_SeqBAIJ,
2709                                        MatImaginaryPart_SeqBAIJ,
2710                                        0,
2711                                        0,
2712                                /*109*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        MatMissingDiagonal_SeqBAIJ,
2717                                /*114*/ 0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                /*119*/ 0,
2723                                        0,
2724                                        MatMultHermitianTranspose_SeqBAIJ,
2725                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2726                                        0,
2727                                /*124*/ 0,
2728                                        0,
2729                                        MatInvertBlockDiagonal_SeqBAIJ,
2730                                        0,
2731                                        0,
2732                                /*129*/ 0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                /*134*/ 0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                        0,
2742                                /*139*/ MatSetBlockSizes_Default,
2743                                        0,
2744                                        0,
2745                                        MatFDColoringSetUp_SeqXAIJ,
2746                                        0,
2747                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2748                                        MatDestroySubMatrices_SeqBAIJ
2749 };
2750 
2751 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2752 {
2753   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2754   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2759 
2760   /* allocate space for values if not already there */
2761   if (!aij->saved_values) {
2762     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2763     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2764   }
2765 
2766   /* copy values over */
2767   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2772 {
2773   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2774   PetscErrorCode ierr;
2775   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2776 
2777   PetscFunctionBegin;
2778   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2779   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2780 
2781   /* copy values over */
2782   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2787 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2788 
2789 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2790 {
2791   Mat_SeqBAIJ    *b;
2792   PetscErrorCode ierr;
2793   PetscInt       i,mbs,nbs,bs2;
2794   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2795 
2796   PetscFunctionBegin;
2797   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2798   if (nz == MAT_SKIP_ALLOCATION) {
2799     skipallocation = PETSC_TRUE;
2800     nz             = 0;
2801   }
2802 
2803   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2804   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2805   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2807 
2808   B->preallocated = PETSC_TRUE;
2809 
2810   mbs = B->rmap->n/bs;
2811   nbs = B->cmap->n/bs;
2812   bs2 = bs*bs;
2813 
2814   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2815 
2816   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2817   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2818   if (nnz) {
2819     for (i=0; i<mbs; i++) {
2820       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2821       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2822     }
2823   }
2824 
2825   b    = (Mat_SeqBAIJ*)B->data;
2826   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2827   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2828   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2829 
2830   if (!flg) {
2831     switch (bs) {
2832     case 1:
2833       B->ops->mult    = MatMult_SeqBAIJ_1;
2834       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2835       break;
2836     case 2:
2837       B->ops->mult    = MatMult_SeqBAIJ_2;
2838       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2839       break;
2840     case 3:
2841       B->ops->mult    = MatMult_SeqBAIJ_3;
2842       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2843       break;
2844     case 4:
2845       B->ops->mult    = MatMult_SeqBAIJ_4;
2846       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2847       break;
2848     case 5:
2849       B->ops->mult    = MatMult_SeqBAIJ_5;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2851       break;
2852     case 6:
2853       B->ops->mult    = MatMult_SeqBAIJ_6;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2855       break;
2856     case 7:
2857       B->ops->mult    = MatMult_SeqBAIJ_7;
2858       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2859       break;
2860     case 9:
2861 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2862       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2863       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2864 #else
2865       B->ops->mult    = MatMult_SeqBAIJ_N;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2867 #endif
2868       break;
2869     case 11:
2870       B->ops->mult    = MatMult_SeqBAIJ_11;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2872       break;
2873     case 15:
2874       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2876       break;
2877     default:
2878       B->ops->mult    = MatMult_SeqBAIJ_N;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2880       break;
2881     }
2882   }
2883   B->ops->sor = MatSOR_SeqBAIJ;
2884   b->mbs = mbs;
2885   b->nbs = nbs;
2886   if (!skipallocation) {
2887     if (!b->imax) {
2888       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2889       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2890 
2891       b->free_imax_ilen = PETSC_TRUE;
2892     }
2893     /* b->ilen will count nonzeros in each block row so far. */
2894     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2895     if (!nnz) {
2896       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2897       else if (nz < 0) nz = 1;
2898       nz = PetscMin(nz,nbs);
2899       for (i=0; i<mbs; i++) b->imax[i] = nz;
2900       nz = nz*mbs;
2901     } else {
2902       nz = 0;
2903       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2904     }
2905 
2906     /* allocate the matrix space */
2907     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2908     if (B->structure_only) {
2909       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2910       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2911       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2912     } else {
2913       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2914       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2915       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2916     }
2917     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2918 
2919     if (B->structure_only) {
2920       b->singlemalloc = PETSC_FALSE;
2921       b->free_a       = PETSC_FALSE;
2922     } else {
2923       b->singlemalloc = PETSC_TRUE;
2924       b->free_a       = PETSC_TRUE;
2925     }
2926     b->free_ij = PETSC_TRUE;
2927 
2928     b->i[0] = 0;
2929     for (i=1; i<mbs+1; i++) {
2930       b->i[i] = b->i[i-1] + b->imax[i-1];
2931     }
2932 
2933   } else {
2934     b->free_a  = PETSC_FALSE;
2935     b->free_ij = PETSC_FALSE;
2936   }
2937 
2938   b->bs2              = bs2;
2939   b->mbs              = mbs;
2940   b->nz               = 0;
2941   b->maxnz            = nz;
2942   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2943   B->was_assembled    = PETSC_FALSE;
2944   B->assembled        = PETSC_FALSE;
2945   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2950 {
2951   PetscInt       i,m,nz,nz_max=0,*nnz;
2952   PetscScalar    *values=0;
2953   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2954   PetscErrorCode ierr;
2955 
2956   PetscFunctionBegin;
2957   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2958   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2960   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2961   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2962   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2963   m    = B->rmap->n/bs;
2964 
2965   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2966   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2967   for (i=0; i<m; i++) {
2968     nz = ii[i+1]- ii[i];
2969     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2970     nz_max = PetscMax(nz_max, nz);
2971     nnz[i] = nz;
2972   }
2973   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2974   ierr = PetscFree(nnz);CHKERRQ(ierr);
2975 
2976   values = (PetscScalar*)V;
2977   if (!values) {
2978     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2979   }
2980   for (i=0; i<m; i++) {
2981     PetscInt          ncols  = ii[i+1] - ii[i];
2982     const PetscInt    *icols = jj + ii[i];
2983     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2984     if (!roworiented) {
2985       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2986     } else {
2987       PetscInt j;
2988       for (j=0; j<ncols; j++) {
2989         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2990         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2991       }
2992     }
2993   }
2994   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2995   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2996   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2997   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 /*MC
3002    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3003    block sparse compressed row format.
3004 
3005    Options Database Keys:
3006 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3007 
3008   Level: beginner
3009 
3010 .seealso: MatCreateSeqBAIJ()
3011 M*/
3012 
3013 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3014 
3015 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3016 {
3017   PetscErrorCode ierr;
3018   PetscMPIInt    size;
3019   Mat_SeqBAIJ    *b;
3020 
3021   PetscFunctionBegin;
3022   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3023   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3024 
3025   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3026   B->data = (void*)b;
3027   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3028 
3029   b->row          = 0;
3030   b->col          = 0;
3031   b->icol         = 0;
3032   b->reallocs     = 0;
3033   b->saved_values = 0;
3034 
3035   b->roworiented        = PETSC_TRUE;
3036   b->nonew              = 0;
3037   b->diag               = 0;
3038   B->spptr              = 0;
3039   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3040   b->keepnonzeropattern = PETSC_FALSE;
3041 
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3051 #if defined(PETSC_HAVE_HYPRE)
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3053 #endif
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3061 {
3062   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3063   PetscErrorCode ierr;
3064   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3065 
3066   PetscFunctionBegin;
3067   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3068 
3069   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3070     c->imax           = a->imax;
3071     c->ilen           = a->ilen;
3072     c->free_imax_ilen = PETSC_FALSE;
3073   } else {
3074     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3075     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3076     for (i=0; i<mbs; i++) {
3077       c->imax[i] = a->imax[i];
3078       c->ilen[i] = a->ilen[i];
3079     }
3080     c->free_imax_ilen = PETSC_TRUE;
3081   }
3082 
3083   /* allocate the matrix space */
3084   if (mallocmatspace) {
3085     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3086       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3087       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3088 
3089       c->i            = a->i;
3090       c->j            = a->j;
3091       c->singlemalloc = PETSC_FALSE;
3092       c->free_a       = PETSC_TRUE;
3093       c->free_ij      = PETSC_FALSE;
3094       c->parent       = A;
3095       C->preallocated = PETSC_TRUE;
3096       C->assembled    = PETSC_TRUE;
3097 
3098       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3099       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3100       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3101     } else {
3102       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3103       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3104 
3105       c->singlemalloc = PETSC_TRUE;
3106       c->free_a       = PETSC_TRUE;
3107       c->free_ij      = PETSC_TRUE;
3108 
3109       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3110       if (mbs > 0) {
3111         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3112         if (cpvalues == MAT_COPY_VALUES) {
3113           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3114         } else {
3115           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3116         }
3117       }
3118       C->preallocated = PETSC_TRUE;
3119       C->assembled    = PETSC_TRUE;
3120     }
3121   }
3122 
3123   c->roworiented = a->roworiented;
3124   c->nonew       = a->nonew;
3125 
3126   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3127   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3128 
3129   c->bs2         = a->bs2;
3130   c->mbs         = a->mbs;
3131   c->nbs         = a->nbs;
3132 
3133   if (a->diag) {
3134     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3135       c->diag      = a->diag;
3136       c->free_diag = PETSC_FALSE;
3137     } else {
3138       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3139       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3140       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3141       c->free_diag = PETSC_TRUE;
3142     }
3143   } else c->diag = 0;
3144 
3145   c->nz         = a->nz;
3146   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3147   c->solve_work = NULL;
3148   c->mult_work  = NULL;
3149   c->sor_workt  = NULL;
3150   c->sor_work   = NULL;
3151 
3152   c->compressedrow.use   = a->compressedrow.use;
3153   c->compressedrow.nrows = a->compressedrow.nrows;
3154   if (a->compressedrow.use) {
3155     i    = a->compressedrow.nrows;
3156     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3157     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3158     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3159     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3160   } else {
3161     c->compressedrow.use    = PETSC_FALSE;
3162     c->compressedrow.i      = NULL;
3163     c->compressedrow.rindex = NULL;
3164   }
3165   C->nonzerostate = A->nonzerostate;
3166 
3167   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3172 {
3173   PetscErrorCode ierr;
3174 
3175   PetscFunctionBegin;
3176   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3177   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3178   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3179   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3180   PetscFunctionReturn(0);
3181 }
3182 
3183 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3184 {
3185   Mat_SeqBAIJ    *a;
3186   PetscErrorCode ierr;
3187   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3188   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3189   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3190   PetscInt       *masked,nmask,tmp,bs2,ishift;
3191   PetscMPIInt    size;
3192   int            fd;
3193   PetscScalar    *aa;
3194   MPI_Comm       comm;
3195   PetscBool      isbinary;
3196 
3197   PetscFunctionBegin;
3198   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3199   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3200 
3201   /* force binary viewer to load .info file if it has not yet done so */
3202   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3203   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3204   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3205   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3206   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3207   if (bs < 0) bs = 1;
3208   bs2  = bs*bs;
3209 
3210   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3211   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3213   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3214   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3215   M = header[1]; N = header[2]; nz = header[3];
3216 
3217   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3218   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3219 
3220   /*
3221      This code adds extra rows to make sure the number of rows is
3222     divisible by the blocksize
3223   */
3224   mbs        = M/bs;
3225   extra_rows = bs - M + bs*(mbs);
3226   if (extra_rows == bs) extra_rows = 0;
3227   else mbs++;
3228   if (extra_rows) {
3229     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3230   }
3231 
3232   /* Set global sizes if not already set */
3233   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3234     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3235   } else { /* Check if the matrix global sizes are correct */
3236     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3237     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3238       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3239     }
3240     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3241   }
3242 
3243   /* read in row lengths */
3244   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3245   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3246   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3247 
3248   /* read in column indices */
3249   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3250   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3251   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3252 
3253   /* loop over row lengths determining block row lengths */
3254   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3255   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3256   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3257   rowcount = 0;
3258   nzcount  = 0;
3259   for (i=0; i<mbs; i++) {
3260     nmask = 0;
3261     for (j=0; j<bs; j++) {
3262       kmax = rowlengths[rowcount];
3263       for (k=0; k<kmax; k++) {
3264         tmp = jj[nzcount++]/bs;
3265         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3266       }
3267       rowcount++;
3268     }
3269     browlengths[i] += nmask;
3270     /* zero out the mask elements we set */
3271     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3272   }
3273 
3274   /* Do preallocation  */
3275   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3276   a    = (Mat_SeqBAIJ*)newmat->data;
3277 
3278   /* set matrix "i" values */
3279   a->i[0] = 0;
3280   for (i=1; i<= mbs; i++) {
3281     a->i[i]      = a->i[i-1] + browlengths[i-1];
3282     a->ilen[i-1] = browlengths[i-1];
3283   }
3284   a->nz = 0;
3285   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3286 
3287   /* read in nonzero values */
3288   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3289   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3290   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3291 
3292   /* set "a" and "j" values into matrix */
3293   nzcount = 0; jcount = 0;
3294   for (i=0; i<mbs; i++) {
3295     nzcountb = nzcount;
3296     nmask    = 0;
3297     for (j=0; j<bs; j++) {
3298       kmax = rowlengths[i*bs+j];
3299       for (k=0; k<kmax; k++) {
3300         tmp = jj[nzcount++]/bs;
3301         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3302       }
3303     }
3304     /* sort the masked values */
3305     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3306 
3307     /* set "j" values into matrix */
3308     maskcount = 1;
3309     for (j=0; j<nmask; j++) {
3310       a->j[jcount++]  = masked[j];
3311       mask[masked[j]] = maskcount++;
3312     }
3313     /* set "a" values into matrix */
3314     ishift = bs2*a->i[i];
3315     for (j=0; j<bs; j++) {
3316       kmax = rowlengths[i*bs+j];
3317       for (k=0; k<kmax; k++) {
3318         tmp       = jj[nzcountb]/bs;
3319         block     = mask[tmp] - 1;
3320         point     = jj[nzcountb] - bs*tmp;
3321         idx       = ishift + bs2*block + j + bs*point;
3322         a->a[idx] = (MatScalar)aa[nzcountb++];
3323       }
3324     }
3325     /* zero out the mask elements we set */
3326     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3327   }
3328   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3329 
3330   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3331   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3332   ierr = PetscFree(aa);CHKERRQ(ierr);
3333   ierr = PetscFree(jj);CHKERRQ(ierr);
3334   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3335 
3336   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3337   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 /*@C
3342    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3343    compressed row) format.  For good matrix assembly performance the
3344    user should preallocate the matrix storage by setting the parameter nz
3345    (or the array nnz).  By setting these parameters accurately, performance
3346    during matrix assembly can be increased by more than a factor of 50.
3347 
3348    Collective on MPI_Comm
3349 
3350    Input Parameters:
3351 +  comm - MPI communicator, set to PETSC_COMM_SELF
3352 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3353           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3354 .  m - number of rows
3355 .  n - number of columns
3356 .  nz - number of nonzero blocks  per block row (same for all rows)
3357 -  nnz - array containing the number of nonzero blocks in the various block rows
3358          (possibly different for each block row) or NULL
3359 
3360    Output Parameter:
3361 .  A - the matrix
3362 
3363    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3364    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3365    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3366 
3367    Options Database Keys:
3368 .   -mat_no_unroll - uses code that does not unroll the loops in the
3369                      block calculations (much slower)
3370 .    -mat_block_size - size of the blocks to use
3371 
3372    Level: intermediate
3373 
3374    Notes:
3375    The number of rows and columns must be divisible by blocksize.
3376 
3377    If the nnz parameter is given then the nz parameter is ignored
3378 
3379    A nonzero block is any block that as 1 or more nonzeros in it
3380 
3381    The block AIJ format is fully compatible with standard Fortran 77
3382    storage.  That is, the stored row and column indices can begin at
3383    either one (as in Fortran) or zero.  See the users' manual for details.
3384 
3385    Specify the preallocated storage with either nz or nnz (not both).
3386    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3387    allocation.  See Users-Manual: ch_mat for details.
3388    matrices.
3389 
3390 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3391 @*/
3392 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3393 {
3394   PetscErrorCode ierr;
3395 
3396   PetscFunctionBegin;
3397   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3398   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3399   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3400   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3401   PetscFunctionReturn(0);
3402 }
3403 
3404 /*@C
3405    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3406    per row in the matrix. For good matrix assembly performance the
3407    user should preallocate the matrix storage by setting the parameter nz
3408    (or the array nnz).  By setting these parameters accurately, performance
3409    during matrix assembly can be increased by more than a factor of 50.
3410 
3411    Collective on MPI_Comm
3412 
3413    Input Parameters:
3414 +  B - the matrix
3415 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3416           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3417 .  nz - number of block nonzeros per block row (same for all rows)
3418 -  nnz - array containing the number of block nonzeros in the various block rows
3419          (possibly different for each block row) or NULL
3420 
3421    Options Database Keys:
3422 .   -mat_no_unroll - uses code that does not unroll the loops in the
3423                      block calculations (much slower)
3424 .   -mat_block_size - size of the blocks to use
3425 
3426    Level: intermediate
3427 
3428    Notes:
3429    If the nnz parameter is given then the nz parameter is ignored
3430 
3431    You can call MatGetInfo() to get information on how effective the preallocation was;
3432    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3433    You can also run with the option -info and look for messages with the string
3434    malloc in them to see if additional memory allocation was needed.
3435 
3436    The block AIJ format is fully compatible with standard Fortran 77
3437    storage.  That is, the stored row and column indices can begin at
3438    either one (as in Fortran) or zero.  See the users' manual for details.
3439 
3440    Specify the preallocated storage with either nz or nnz (not both).
3441    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3442    allocation.  See Users-Manual: ch_mat for details.
3443 
3444 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3445 @*/
3446 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3447 {
3448   PetscErrorCode ierr;
3449 
3450   PetscFunctionBegin;
3451   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3452   PetscValidType(B,1);
3453   PetscValidLogicalCollectiveInt(B,bs,2);
3454   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3455   PetscFunctionReturn(0);
3456 }
3457 
3458 /*@C
3459    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3460    (the default sequential PETSc format).
3461 
3462    Collective on MPI_Comm
3463 
3464    Input Parameters:
3465 +  B - the matrix
3466 .  i - the indices into j for the start of each local row (starts with zero)
3467 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3468 -  v - optional values in the matrix
3469 
3470    Level: developer
3471 
3472    Notes:
3473    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3474    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3475    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3476    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3477    block column and the second index is over columns within a block.
3478 
3479 .keywords: matrix, aij, compressed row, sparse
3480 
3481 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3482 @*/
3483 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3484 {
3485   PetscErrorCode ierr;
3486 
3487   PetscFunctionBegin;
3488   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3489   PetscValidType(B,1);
3490   PetscValidLogicalCollectiveInt(B,bs,2);
3491   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 
3496 /*@
3497      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3498 
3499      Collective on MPI_Comm
3500 
3501    Input Parameters:
3502 +  comm - must be an MPI communicator of size 1
3503 .  bs - size of block
3504 .  m - number of rows
3505 .  n - number of columns
3506 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3507 .  j - column indices
3508 -  a - matrix values
3509 
3510    Output Parameter:
3511 .  mat - the matrix
3512 
3513    Level: advanced
3514 
3515    Notes:
3516        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3517     once the matrix is destroyed
3518 
3519        You cannot set new nonzero locations into this matrix, that will generate an error.
3520 
3521        The i and j indices are 0 based
3522 
3523        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3524 
3525       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3526       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3527       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3528       with column-major ordering within blocks.
3529 
3530 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3531 
3532 @*/
3533 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3534 {
3535   PetscErrorCode ierr;
3536   PetscInt       ii;
3537   Mat_SeqBAIJ    *baij;
3538 
3539   PetscFunctionBegin;
3540   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3541   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3542 
3543   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3544   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3545   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3546   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3547   baij = (Mat_SeqBAIJ*)(*mat)->data;
3548   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3549   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3550 
3551   baij->i = i;
3552   baij->j = j;
3553   baij->a = a;
3554 
3555   baij->singlemalloc = PETSC_FALSE;
3556   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3557   baij->free_a       = PETSC_FALSE;
3558   baij->free_ij      = PETSC_FALSE;
3559 
3560   for (ii=0; ii<m; ii++) {
3561     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3562 #if defined(PETSC_USE_DEBUG)
3563     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3564 #endif
3565   }
3566 #if defined(PETSC_USE_DEBUG)
3567   for (ii=0; ii<baij->i[m]; ii++) {
3568     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3569     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3570   }
3571 #endif
3572 
3573   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3574   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3575   PetscFunctionReturn(0);
3576 }
3577 
3578 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3579 {
3580   PetscErrorCode ierr;
3581   PetscMPIInt    size;
3582 
3583   PetscFunctionBegin;
3584   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3585   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3586     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3587   } else {
3588     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3589   }
3590   PetscFunctionReturn(0);
3591 }
3592