xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b51a437600744b27e674134f831f57ac6dc85c95)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912   PetscErrorCode    ierr;
913 
914   PetscFunctionBegin;
915   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916   stepval = (n-1)*4;
917   for (k=0; k<m; k++) { /* loop over added rows */
918     row  = im[k];
919     rp   = aj + ai[row];
920     ap   = aa + 16*ai[row];
921     nrow = ailen[row];
922     low  = 0;
923     high = nrow;
924     for (l=0; l<n; l++) { /* loop over added columns */
925       col = in[l];
926       if (col <= lastcol)  low = 0;
927       else                high = nrow;
928       lastcol = col;
929       value   = v + k*(stepval+4 + l)*4;
930       while (high-low > 7) {
931         t = (low+high)/2;
932         if (rp[t] > col) high = t;
933         else             low  = t;
934       }
935       for (i=low; i<high; i++) {
936         if (rp[i] > col) break;
937         if (rp[i] == col) {
938           bap = ap +  16*i;
939           for (ii=0; ii<4; ii++,value+=stepval) {
940             for (jj=ii; jj<16; jj+=4) {
941               bap[jj] += *value++;
942             }
943           }
944           goto noinsert2;
945         }
946       }
947       N = nrow++ - 1;
948       high++; /* added new column index thus must search to one higher than before */
949       /* shift up all the later entries in this row */
950       for (ii=N; ii>=i; ii--) {
951         rp[ii+1] = rp[ii];
952         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
953       }
954       if (N >= i) {
955         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
956       }
957       rp[i] = col;
958       bap   = ap +  16*i;
959       for (ii=0; ii<4; ii++,value+=stepval) {
960         for (jj=ii; jj<16; jj+=4) {
961           bap[jj] = *value++;
962         }
963       }
964       noinsert2:;
965       low = i;
966     }
967     ailen[row] = nrow;
968   }
969   PetscFunctionReturnVoid();
970 }
971 
972 #if defined(PETSC_HAVE_FORTRAN_CAPS)
973 #define matsetvalues4_ MATSETVALUES4
974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975 #define matsetvalues4_ matsetvalues4
976 #endif
977 
978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979 {
980   Mat         A  = *AA;
981   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
983   PetscInt    *ai=a->i,*ailen=a->ilen;
984   PetscInt    *aj=a->j,brow,bcol;
985   PetscInt    ridx,cidx,lastcol = -1;
986   MatScalar   *ap,value,*aa=a->a,*bap;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   for (k=0; k<m; k++) { /* loop over added rows */
991     row  = im[k]; brow = row/4;
992     rp   = aj + ai[brow];
993     ap   = aa + 16*ai[brow];
994     nrow = ailen[brow];
995     low  = 0;
996     high = nrow;
997     for (l=0; l<n; l++) { /* loop over added columns */
998       col   = in[l]; bcol = col/4;
999       ridx  = row % 4; cidx = col % 4;
1000       value = v[l + k*n];
1001       if (col <= lastcol)  low = 0;
1002       else                high = nrow;
1003       lastcol = col;
1004       while (high-low > 7) {
1005         t = (low+high)/2;
1006         if (rp[t] > bcol) high = t;
1007         else              low  = t;
1008       }
1009       for (i=low; i<high; i++) {
1010         if (rp[i] > bcol) break;
1011         if (rp[i] == bcol) {
1012           bap   = ap +  16*i + 4*cidx + ridx;
1013           *bap += value;
1014           goto noinsert1;
1015         }
1016       }
1017       N = nrow++ - 1;
1018       high++; /* added new column thus must search to one higher than before */
1019       /* shift up all the later entries in this row */
1020       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1021       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1022       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1023       rp[i]                    = bcol;
1024       ap[16*i + 4*cidx + ridx] = value;
1025 noinsert1:;
1026       low = i;
1027     }
1028     ailen[brow] = nrow;
1029   }
1030   PetscFunctionReturnVoid();
1031 }
1032 
1033 /*
1034      Checks for missing diagonals
1035 */
1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1037 {
1038   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040   PetscInt       *diag,*ii = a->i,i;
1041 
1042   PetscFunctionBegin;
1043   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1044   *missing = PETSC_FALSE;
1045   if (A->rmap->n > 0 && !ii) {
1046     *missing = PETSC_TRUE;
1047     if (d) *d = 0;
1048     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1049   } else {
1050     PetscInt n;
1051     n = PetscMin(a->mbs, a->nbs);
1052     diag = a->diag;
1053     for (i=0; i<n; i++) {
1054       if (diag[i] >= ii[i+1]) {
1055         *missing = PETSC_TRUE;
1056         if (d) *d = i;
1057         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1058         break;
1059       }
1060     }
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066 {
1067   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1068   PetscErrorCode ierr;
1069   PetscInt       i,j,m = a->mbs;
1070 
1071   PetscFunctionBegin;
1072   if (!a->diag) {
1073     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1074     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1075     a->free_diag = PETSC_TRUE;
1076   }
1077   for (i=0; i<m; i++) {
1078     a->diag[i] = a->i[i+1];
1079     for (j=a->i[i]; j<a->i[i+1]; j++) {
1080       if (a->j[j] == i) {
1081         a->diag[i] = j;
1082         break;
1083       }
1084     }
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = oshift;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1174 {
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (!ia) PetscFunctionReturn(0);
1179   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180     ierr = PetscFree(*ia);CHKERRQ(ierr);
1181     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187 {
1188   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192 #if defined(PETSC_USE_LOG)
1193   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194 #endif
1195   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1197   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1198   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1200   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1201   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1204   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1206   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1207   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1208 
1209   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1210   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1211   ierr = PetscFree(A->data);CHKERRQ(ierr);
1212 
1213   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_HYPRE)
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1235 {
1236   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   switch (op) {
1241   case MAT_ROW_ORIENTED:
1242     a->roworiented = flg;
1243     break;
1244   case MAT_KEEP_NONZERO_PATTERN:
1245     a->keepnonzeropattern = flg;
1246     break;
1247   case MAT_NEW_NONZERO_LOCATIONS:
1248     a->nonew = (flg ? 0 : 1);
1249     break;
1250   case MAT_NEW_NONZERO_LOCATION_ERR:
1251     a->nonew = (flg ? -1 : 0);
1252     break;
1253   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1254     a->nonew = (flg ? -2 : 0);
1255     break;
1256   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1257     a->nounused = (flg ? -1 : 0);
1258     break;
1259   case MAT_NEW_DIAGONALS:
1260   case MAT_IGNORE_OFF_PROC_ENTRIES:
1261   case MAT_USE_HASH_TABLE:
1262   case MAT_SORTED_FULL:
1263     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1264     break;
1265   case MAT_SPD:
1266   case MAT_SYMMETRIC:
1267   case MAT_STRUCTURALLY_SYMMETRIC:
1268   case MAT_HERMITIAN:
1269   case MAT_SYMMETRY_ETERNAL:
1270   case MAT_SUBMAT_SINGLEIS:
1271   case MAT_STRUCTURE_ONLY:
1272     /* These options are handled directly by MatSetOption() */
1273     break;
1274   default:
1275     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1281 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1282 {
1283   PetscErrorCode ierr;
1284   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1285   MatScalar      *aa_i;
1286   PetscScalar    *v_i;
1287 
1288   PetscFunctionBegin;
1289   bs  = A->rmap->bs;
1290   bs2 = bs*bs;
1291   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1292 
1293   bn  = row/bs;   /* Block number */
1294   bp  = row % bs; /* Block Position */
1295   M   = ai[bn+1] - ai[bn];
1296   *nz = bs*M;
1297 
1298   if (v) {
1299     *v = 0;
1300     if (*nz) {
1301       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1302       for (i=0; i<M; i++) { /* for each block in the block row */
1303         v_i  = *v + i*bs;
1304         aa_i = aa + bs2*(ai[bn] + i);
1305         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1306       }
1307     }
1308   }
1309 
1310   if (idx) {
1311     *idx = 0;
1312     if (*nz) {
1313       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1314       for (i=0; i<M; i++) { /* for each block in the block row */
1315         idx_i = *idx + i*bs;
1316         itmp  = bs*aj[ai[bn] + i];
1317         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1318       }
1319     }
1320   }
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325 {
1326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1335 {
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1340   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1345 {
1346   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1347   Mat            C;
1348   PetscErrorCode ierr;
1349   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1350   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1351   MatScalar      *ata,*aa=a->a;
1352 
1353   PetscFunctionBegin;
1354   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1355   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1356     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1357 
1358     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1359     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1360     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1361     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1362 
1363     at  = (Mat_SeqBAIJ*)C->data;
1364     ati = at->i;
1365     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1366   } else {
1367     C = *B;
1368     at = (Mat_SeqBAIJ*)C->data;
1369     ati = at->i;
1370   }
1371 
1372   atj = at->j;
1373   ata = at->a;
1374 
1375   /* Copy ati into atfill so we have locations of the next free space in atj */
1376   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1377 
1378   /* Walk through A row-wise and mark nonzero entries of A^T. */
1379   for (i=0; i<mbs; i++) {
1380     anzj = ai[i+1] - ai[i];
1381     for (j=0; j<anzj; j++) {
1382       atj[atfill[*aj]] = i;
1383       for (kr=0; kr<bs; kr++) {
1384         for (k=0; k<bs; k++) {
1385           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1386         }
1387       }
1388       atfill[*aj++] += 1;
1389     }
1390   }
1391   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393 
1394   /* Clean up temporary space and complete requests. */
1395   ierr = PetscFree(atfill);CHKERRQ(ierr);
1396 
1397   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1398     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1399     *B = C;
1400   } else {
1401     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1407 {
1408   PetscErrorCode ierr;
1409   Mat            Btrans;
1410 
1411   PetscFunctionBegin;
1412   *f   = PETSC_FALSE;
1413   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1414   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1415   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1420 {
1421   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1422   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1423   PetscInt       *rowlens,*colidxs;
1424   PetscScalar    *matvals;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1429 
1430   M  = mat->rmap->N;
1431   N  = mat->cmap->N;
1432   m  = mat->rmap->n;
1433   bs = mat->rmap->bs;
1434   nz = bs*bs*A->nz;
1435 
1436   /* write matrix header */
1437   header[0] = MAT_FILE_CLASSID;
1438   header[1] = M; header[2] = N; header[3] = nz;
1439   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1440 
1441   /* store row lengths */
1442   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1443   for (cnt=0, i=0; i<A->mbs; i++)
1444     for (j=0; j<bs; j++)
1445       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1446   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1447   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1448 
1449   /* store column indices  */
1450   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1451   for (cnt=0, i=0; i<A->mbs; i++)
1452     for (k=0; k<bs; k++)
1453       for (j=A->i[i]; j<A->i[i+1]; j++)
1454         for (l=0; l<bs; l++)
1455           colidxs[cnt++] = bs*A->j[j] + l;
1456   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1457   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1458   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1459 
1460   /* store nonzero values */
1461   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1462   for (cnt=0, i=0; i<A->mbs; i++)
1463     for (k=0; k<bs; k++)
1464       for (j=A->i[i]; j<A->i[i+1]; j++)
1465         for (l=0; l<bs; l++)
1466           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1467   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1468   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1469   ierr = PetscFree(matvals);CHKERRQ(ierr);
1470 
1471   /* write block size option to the viewer's .info file */
1472   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1477 {
1478   PetscErrorCode ierr;
1479   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1480   PetscInt       i,bs = A->rmap->bs,k;
1481 
1482   PetscFunctionBegin;
1483   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1484   for (i=0; i<a->mbs; i++) {
1485     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1486     for (k=a->i[i]; k<a->i[i+1]; k++) {
1487       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1488     }
1489     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1490   }
1491   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1496 {
1497   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1498   PetscErrorCode    ierr;
1499   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1500   PetscViewerFormat format;
1501 
1502   PetscFunctionBegin;
1503   if (A->structure_only) {
1504     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1505     PetscFunctionReturn(0);
1506   }
1507 
1508   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1509   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1510     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1511   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1512     const char *matname;
1513     Mat        aij;
1514     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1515     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1516     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1517     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1518     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1519   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1520       PetscFunctionReturn(0);
1521   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1522     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1523     for (i=0; i<a->mbs; i++) {
1524       for (j=0; j<bs; j++) {
1525         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1526         for (k=a->i[i]; k<a->i[i+1]; k++) {
1527           for (l=0; l<bs; l++) {
1528 #if defined(PETSC_USE_COMPLEX)
1529             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1530               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1531                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1532             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1533               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1534                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1535             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             }
1538 #else
1539             if (a->a[bs2*k + l*bs + j] != 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1541             }
1542 #endif
1543           }
1544         }
1545         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1546       }
1547     }
1548     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1549   } else {
1550     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1551     for (i=0; i<a->mbs; i++) {
1552       for (j=0; j<bs; j++) {
1553         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1554         for (k=a->i[i]; k<a->i[i+1]; k++) {
1555           for (l=0; l<bs; l++) {
1556 #if defined(PETSC_USE_COMPLEX)
1557             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1558               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1559                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1560             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1561               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1562                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1563             } else {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             }
1566 #else
1567             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1568 #endif
1569           }
1570         }
1571         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1572       }
1573     }
1574     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1575   }
1576   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #include <petscdraw.h>
1581 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1582 {
1583   Mat               A = (Mat) Aa;
1584   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1585   PetscErrorCode    ierr;
1586   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1587   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1588   MatScalar         *aa;
1589   PetscViewer       viewer;
1590   PetscViewerFormat format;
1591 
1592   PetscFunctionBegin;
1593   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1594   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1595   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1596 
1597   /* loop over matrix elements drawing boxes */
1598 
1599   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1600     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1601     /* Blue for negative, Cyan for zero and  Red for positive */
1602     color = PETSC_DRAW_BLUE;
1603     for (i=0,row=0; i<mbs; i++,row+=bs) {
1604       for (j=a->i[i]; j<a->i[i+1]; j++) {
1605         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1606         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1607         aa  = a->a + j*bs2;
1608         for (k=0; k<bs; k++) {
1609           for (l=0; l<bs; l++) {
1610             if (PetscRealPart(*aa++) >=  0.) continue;
1611             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1612           }
1613         }
1614       }
1615     }
1616     color = PETSC_DRAW_CYAN;
1617     for (i=0,row=0; i<mbs; i++,row+=bs) {
1618       for (j=a->i[i]; j<a->i[i+1]; j++) {
1619         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1620         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1621         aa  = a->a + j*bs2;
1622         for (k=0; k<bs; k++) {
1623           for (l=0; l<bs; l++) {
1624             if (PetscRealPart(*aa++) != 0.) continue;
1625             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1626           }
1627         }
1628       }
1629     }
1630     color = PETSC_DRAW_RED;
1631     for (i=0,row=0; i<mbs; i++,row+=bs) {
1632       for (j=a->i[i]; j<a->i[i+1]; j++) {
1633         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1634         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635         aa  = a->a + j*bs2;
1636         for (k=0; k<bs; k++) {
1637           for (l=0; l<bs; l++) {
1638             if (PetscRealPart(*aa++) <= 0.) continue;
1639             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1640           }
1641         }
1642       }
1643     }
1644     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1645   } else {
1646     /* use contour shading to indicate magnitude of values */
1647     /* first determine max of all nonzero values */
1648     PetscReal minv = 0.0, maxv = 0.0;
1649     PetscDraw popup;
1650 
1651     for (i=0; i<a->nz*a->bs2; i++) {
1652       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1653     }
1654     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1655     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1656     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1657 
1658     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1659     for (i=0,row=0; i<mbs; i++,row+=bs) {
1660       for (j=a->i[i]; j<a->i[i+1]; j++) {
1661         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1662         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1663         aa  = a->a + j*bs2;
1664         for (k=0; k<bs; k++) {
1665           for (l=0; l<bs; l++) {
1666             MatScalar v = *aa++;
1667             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1668             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1669           }
1670         }
1671       }
1672     }
1673     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1674   }
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1679 {
1680   PetscErrorCode ierr;
1681   PetscReal      xl,yl,xr,yr,w,h;
1682   PetscDraw      draw;
1683   PetscBool      isnull;
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1687   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1688   if (isnull) PetscFunctionReturn(0);
1689 
1690   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1691   xr  += w;          yr += h;        xl = -w;     yl = -h;
1692   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1693   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1694   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1695   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1696   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1701 {
1702   PetscErrorCode ierr;
1703   PetscBool      iascii,isbinary,isdraw;
1704 
1705   PetscFunctionBegin;
1706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1708   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1709   if (iascii) {
1710     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1711   } else if (isbinary) {
1712     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1713   } else if (isdraw) {
1714     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1715   } else {
1716     Mat B;
1717     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1718     ierr = MatView(B,viewer);CHKERRQ(ierr);
1719     ierr = MatDestroy(&B);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 
1725 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1726 {
1727   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1728   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1729   PetscInt    *ai = a->i,*ailen = a->ilen;
1730   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1731   MatScalar   *ap,*aa = a->a;
1732 
1733   PetscFunctionBegin;
1734   for (k=0; k<m; k++) { /* loop over rows */
1735     row = im[k]; brow = row/bs;
1736     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1737     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1738     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1739     nrow = ailen[brow];
1740     for (l=0; l<n; l++) { /* loop over columns */
1741       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1742       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1743       col  = in[l];
1744       bcol = col/bs;
1745       cidx = col%bs;
1746       ridx = row%bs;
1747       high = nrow;
1748       low  = 0; /* assume unsorted */
1749       while (high-low > 5) {
1750         t = (low+high)/2;
1751         if (rp[t] > bcol) high = t;
1752         else             low  = t;
1753       }
1754       for (i=low; i<high; i++) {
1755         if (rp[i] > bcol) break;
1756         if (rp[i] == bcol) {
1757           *v++ = ap[bs2*i+bs*cidx+ridx];
1758           goto finished;
1759         }
1760       }
1761       *v++ = 0.0;
1762 finished:;
1763     }
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1769 {
1770   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1771   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1772   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1773   PetscErrorCode    ierr;
1774   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1775   PetscBool         roworiented=a->roworiented;
1776   const PetscScalar *value     = v;
1777   MatScalar         *ap=NULL,*aa = a->a,*bap;
1778 
1779   PetscFunctionBegin;
1780   if (roworiented) {
1781     stepval = (n-1)*bs;
1782   } else {
1783     stepval = (m-1)*bs;
1784   }
1785   for (k=0; k<m; k++) { /* loop over added rows */
1786     row = im[k];
1787     if (row < 0) continue;
1788 #if defined(PETSC_USE_DEBUG)
1789     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1790 #endif
1791     rp   = aj + ai[row];
1792     if (!A->structure_only) ap = aa + bs2*ai[row];
1793     rmax = imax[row];
1794     nrow = ailen[row];
1795     low  = 0;
1796     high = nrow;
1797     for (l=0; l<n; l++) { /* loop over added columns */
1798       if (in[l] < 0) continue;
1799 #if defined(PETSC_USE_DEBUG)
1800       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1801 #endif
1802       col = in[l];
1803       if (!A->structure_only) {
1804         if (roworiented) {
1805           value = v + (k*(stepval+bs) + l)*bs;
1806         } else {
1807           value = v + (l*(stepval+bs) + k)*bs;
1808         }
1809       }
1810       if (col <= lastcol) low = 0;
1811       else high = nrow;
1812       lastcol = col;
1813       while (high-low > 7) {
1814         t = (low+high)/2;
1815         if (rp[t] > col) high = t;
1816         else             low  = t;
1817       }
1818       for (i=low; i<high; i++) {
1819         if (rp[i] > col) break;
1820         if (rp[i] == col) {
1821           if (A->structure_only) goto noinsert2;
1822           bap = ap +  bs2*i;
1823           if (roworiented) {
1824             if (is == ADD_VALUES) {
1825               for (ii=0; ii<bs; ii++,value+=stepval) {
1826                 for (jj=ii; jj<bs2; jj+=bs) {
1827                   bap[jj] += *value++;
1828                 }
1829               }
1830             } else {
1831               for (ii=0; ii<bs; ii++,value+=stepval) {
1832                 for (jj=ii; jj<bs2; jj+=bs) {
1833                   bap[jj] = *value++;
1834                 }
1835               }
1836             }
1837           } else {
1838             if (is == ADD_VALUES) {
1839               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1840                 for (jj=0; jj<bs; jj++) {
1841                   bap[jj] += value[jj];
1842                 }
1843                 bap += bs;
1844               }
1845             } else {
1846               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1847                 for (jj=0; jj<bs; jj++) {
1848                   bap[jj]  = value[jj];
1849                 }
1850                 bap += bs;
1851               }
1852             }
1853           }
1854           goto noinsert2;
1855         }
1856       }
1857       if (nonew == 1) goto noinsert2;
1858       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1859       if (A->structure_only) {
1860         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1861       } else {
1862         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1863       }
1864       N = nrow++ - 1; high++;
1865       /* shift up all the later entries in this row */
1866       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1867       rp[i] = col;
1868       if (!A->structure_only) {
1869         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1870         bap   = ap +  bs2*i;
1871         if (roworiented) {
1872           for (ii=0; ii<bs; ii++,value+=stepval) {
1873             for (jj=ii; jj<bs2; jj+=bs) {
1874               bap[jj] = *value++;
1875             }
1876           }
1877         } else {
1878           for (ii=0; ii<bs; ii++,value+=stepval) {
1879             for (jj=0; jj<bs; jj++) {
1880               *bap++ = *value++;
1881             }
1882           }
1883         }
1884       }
1885 noinsert2:;
1886       low = i;
1887     }
1888     ailen[row] = nrow;
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1894 {
1895   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1896   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1897   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1898   PetscErrorCode ierr;
1899   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1900   MatScalar      *aa  = a->a,*ap;
1901   PetscReal      ratio=0.6;
1902 
1903   PetscFunctionBegin;
1904   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1905 
1906   if (m) rmax = ailen[0];
1907   for (i=1; i<mbs; i++) {
1908     /* move each row back by the amount of empty slots (fshift) before it*/
1909     fshift += imax[i-1] - ailen[i-1];
1910     rmax    = PetscMax(rmax,ailen[i]);
1911     if (fshift) {
1912       ip = aj + ai[i];
1913       ap = aa + bs2*ai[i];
1914       N  = ailen[i];
1915       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1916       if (!A->structure_only) {
1917         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1918       }
1919     }
1920     ai[i] = ai[i-1] + ailen[i-1];
1921   }
1922   if (mbs) {
1923     fshift += imax[mbs-1] - ailen[mbs-1];
1924     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1925   }
1926 
1927   /* reset ilen and imax for each row */
1928   a->nonzerorowcnt = 0;
1929   if (A->structure_only) {
1930     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1931   } else { /* !A->structure_only */
1932     for (i=0; i<mbs; i++) {
1933       ailen[i] = imax[i] = ai[i+1] - ai[i];
1934       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1935     }
1936   }
1937   a->nz = ai[mbs];
1938 
1939   /* diagonals may have moved, so kill the diagonal pointers */
1940   a->idiagvalid = PETSC_FALSE;
1941   if (fshift && a->diag) {
1942     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1943     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1944     a->diag = 0;
1945   }
1946   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1947   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1948   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1949   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1950 
1951   A->info.mallocs    += a->reallocs;
1952   a->reallocs         = 0;
1953   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1954   a->rmax             = rmax;
1955 
1956   if (!A->structure_only) {
1957     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 /*
1963    This function returns an array of flags which indicate the locations of contiguous
1964    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1965    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1966    Assume: sizes should be long enough to hold all the values.
1967 */
1968 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1969 {
1970   PetscInt  i,j,k,row;
1971   PetscBool flg;
1972 
1973   PetscFunctionBegin;
1974   for (i=0,j=0; i<n; j++) {
1975     row = idx[i];
1976     if (row%bs!=0) { /* Not the begining of a block */
1977       sizes[j] = 1;
1978       i++;
1979     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1980       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1981       i++;
1982     } else { /* Begining of the block, so check if the complete block exists */
1983       flg = PETSC_TRUE;
1984       for (k=1; k<bs; k++) {
1985         if (row+k != idx[i+k]) { /* break in the block */
1986           flg = PETSC_FALSE;
1987           break;
1988         }
1989       }
1990       if (flg) { /* No break in the bs */
1991         sizes[j] = bs;
1992         i       += bs;
1993       } else {
1994         sizes[j] = 1;
1995         i++;
1996       }
1997     }
1998   }
1999   *bs_max = j;
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2004 {
2005   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2006   PetscErrorCode    ierr;
2007   PetscInt          i,j,k,count,*rows;
2008   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2009   PetscScalar       zero = 0.0;
2010   MatScalar         *aa;
2011   const PetscScalar *xx;
2012   PetscScalar       *bb;
2013 
2014   PetscFunctionBegin;
2015   /* fix right hand side if needed */
2016   if (x && b) {
2017     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2018     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2019     for (i=0; i<is_n; i++) {
2020       bb[is_idx[i]] = diag*xx[is_idx[i]];
2021     }
2022     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2023     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2024   }
2025 
2026   /* Make a copy of the IS and  sort it */
2027   /* allocate memory for rows,sizes */
2028   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2029 
2030   /* copy IS values to rows, and sort them */
2031   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2032   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2033 
2034   if (baij->keepnonzeropattern) {
2035     for (i=0; i<is_n; i++) sizes[i] = 1;
2036     bs_max          = is_n;
2037   } else {
2038     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2039     A->nonzerostate++;
2040   }
2041 
2042   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2043     row = rows[j];
2044     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2045     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2046     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2047     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2048       if (diag != (PetscScalar)0.0) {
2049         if (baij->ilen[row/bs] > 0) {
2050           baij->ilen[row/bs]       = 1;
2051           baij->j[baij->i[row/bs]] = row/bs;
2052 
2053           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2054         }
2055         /* Now insert all the diagonal values for this bs */
2056         for (k=0; k<bs; k++) {
2057           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2058         }
2059       } else { /* (diag == 0.0) */
2060         baij->ilen[row/bs] = 0;
2061       } /* end (diag == 0.0) */
2062     } else { /* (sizes[i] != bs) */
2063 #if defined(PETSC_USE_DEBUG)
2064       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2065 #endif
2066       for (k=0; k<count; k++) {
2067         aa[0] =  zero;
2068         aa   += bs;
2069       }
2070       if (diag != (PetscScalar)0.0) {
2071         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2072       }
2073     }
2074   }
2075 
2076   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2077   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2082 {
2083   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2084   PetscErrorCode    ierr;
2085   PetscInt          i,j,k,count;
2086   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2087   PetscScalar       zero = 0.0;
2088   MatScalar         *aa;
2089   const PetscScalar *xx;
2090   PetscScalar       *bb;
2091   PetscBool         *zeroed,vecs = PETSC_FALSE;
2092 
2093   PetscFunctionBegin;
2094   /* fix right hand side if needed */
2095   if (x && b) {
2096     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2097     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2098     vecs = PETSC_TRUE;
2099   }
2100 
2101   /* zero the columns */
2102   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2103   for (i=0; i<is_n; i++) {
2104     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2105     zeroed[is_idx[i]] = PETSC_TRUE;
2106   }
2107   for (i=0; i<A->rmap->N; i++) {
2108     if (!zeroed[i]) {
2109       row = i/bs;
2110       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2111         for (k=0; k<bs; k++) {
2112           col = bs*baij->j[j] + k;
2113           if (zeroed[col]) {
2114             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2115             if (vecs) bb[i] -= aa[0]*xx[col];
2116             aa[0] = 0.0;
2117           }
2118         }
2119       }
2120     } else if (vecs) bb[i] = diag*xx[i];
2121   }
2122   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2123   if (vecs) {
2124     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2125     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2126   }
2127 
2128   /* zero the rows */
2129   for (i=0; i<is_n; i++) {
2130     row   = is_idx[i];
2131     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2132     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2133     for (k=0; k<count; k++) {
2134       aa[0] =  zero;
2135       aa   += bs;
2136     }
2137     if (diag != (PetscScalar)0.0) {
2138       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2139     }
2140   }
2141   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2146 {
2147   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2148   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2149   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2150   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2151   PetscErrorCode ierr;
2152   PetscInt       ridx,cidx,bs2=a->bs2;
2153   PetscBool      roworiented=a->roworiented;
2154   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2155 
2156   PetscFunctionBegin;
2157   for (k=0; k<m; k++) { /* loop over added rows */
2158     row  = im[k];
2159     brow = row/bs;
2160     if (row < 0) continue;
2161 #if defined(PETSC_USE_DEBUG)
2162     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2163 #endif
2164     rp   = aj + ai[brow];
2165     if (!A->structure_only) ap = aa + bs2*ai[brow];
2166     rmax = imax[brow];
2167     nrow = ailen[brow];
2168     low  = 0;
2169     high = nrow;
2170     for (l=0; l<n; l++) { /* loop over added columns */
2171       if (in[l] < 0) continue;
2172 #if defined(PETSC_USE_DEBUG)
2173       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2174 #endif
2175       col  = in[l]; bcol = col/bs;
2176       ridx = row % bs; cidx = col % bs;
2177       if (!A->structure_only) {
2178         if (roworiented) {
2179           value = v[l + k*n];
2180         } else {
2181           value = v[k + l*m];
2182         }
2183       }
2184       if (col <= lastcol) low = 0; else high = nrow;
2185       lastcol = col;
2186       while (high-low > 7) {
2187         t = (low+high)/2;
2188         if (rp[t] > bcol) high = t;
2189         else              low  = t;
2190       }
2191       for (i=low; i<high; i++) {
2192         if (rp[i] > bcol) break;
2193         if (rp[i] == bcol) {
2194           bap = ap +  bs2*i + bs*cidx + ridx;
2195           if (!A->structure_only) {
2196             if (is == ADD_VALUES) *bap += value;
2197             else                  *bap  = value;
2198           }
2199           goto noinsert1;
2200         }
2201       }
2202       if (nonew == 1) goto noinsert1;
2203       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2204       if (A->structure_only) {
2205         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2206       } else {
2207         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2208       }
2209       N = nrow++ - 1; high++;
2210       /* shift up all the later entries in this row */
2211       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2212       rp[i] = bcol;
2213       if (!A->structure_only) {
2214         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2215         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2216         ap[bs2*i + bs*cidx + ridx] = value;
2217       }
2218       a->nz++;
2219       A->nonzerostate++;
2220 noinsert1:;
2221       low = i;
2222     }
2223     ailen[brow] = nrow;
2224   }
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2229 {
2230   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2231   Mat            outA;
2232   PetscErrorCode ierr;
2233   PetscBool      row_identity,col_identity;
2234 
2235   PetscFunctionBegin;
2236   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2237   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2238   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2239   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2240 
2241   outA            = inA;
2242   inA->factortype = MAT_FACTOR_LU;
2243   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2244   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2245 
2246   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2247 
2248   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2249   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2250   a->row = row;
2251   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2252   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2253   a->col = col;
2254 
2255   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2256   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2257   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2258   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2259 
2260   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2261   if (!a->solve_work) {
2262     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2263     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2264   }
2265   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2270 {
2271   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2272   PetscInt    i,nz,mbs;
2273 
2274   PetscFunctionBegin;
2275   nz  = baij->maxnz;
2276   mbs = baij->mbs;
2277   for (i=0; i<nz; i++) {
2278     baij->j[i] = indices[i];
2279   }
2280   baij->nz = nz;
2281   for (i=0; i<mbs; i++) {
2282     baij->ilen[i] = baij->imax[i];
2283   }
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 /*@
2288     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2289        in the matrix.
2290 
2291   Input Parameters:
2292 +  mat - the SeqBAIJ matrix
2293 -  indices - the column indices
2294 
2295   Level: advanced
2296 
2297   Notes:
2298     This can be called if you have precomputed the nonzero structure of the
2299   matrix and want to provide it to the matrix object to improve the performance
2300   of the MatSetValues() operation.
2301 
2302     You MUST have set the correct numbers of nonzeros per row in the call to
2303   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2304 
2305     MUST be called before any calls to MatSetValues();
2306 
2307 @*/
2308 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2309 {
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2314   PetscValidPointer(indices,2);
2315   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2320 {
2321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2322   PetscErrorCode ierr;
2323   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2324   PetscReal      atmp;
2325   PetscScalar    *x,zero = 0.0;
2326   MatScalar      *aa;
2327   PetscInt       ncols,brow,krow,kcol;
2328 
2329   PetscFunctionBegin;
2330   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2331   bs  = A->rmap->bs;
2332   aa  = a->a;
2333   ai  = a->i;
2334   aj  = a->j;
2335   mbs = a->mbs;
2336 
2337   ierr = VecSet(v,zero);CHKERRQ(ierr);
2338   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2339   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2340   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2341   for (i=0; i<mbs; i++) {
2342     ncols = ai[1] - ai[0]; ai++;
2343     brow  = bs*i;
2344     for (j=0; j<ncols; j++) {
2345       for (kcol=0; kcol<bs; kcol++) {
2346         for (krow=0; krow<bs; krow++) {
2347           atmp = PetscAbsScalar(*aa);aa++;
2348           row  = brow + krow;   /* row index */
2349           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2350         }
2351       }
2352       aj++;
2353     }
2354   }
2355   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2360 {
2361   PetscErrorCode ierr;
2362 
2363   PetscFunctionBegin;
2364   /* If the two matrices have the same copy implementation, use fast copy. */
2365   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2366     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2367     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2368     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2369 
2370     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2371     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2372     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2373     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2374   } else {
2375     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2376   }
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2381 {
2382   PetscErrorCode ierr;
2383 
2384   PetscFunctionBegin;
2385   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2390 {
2391   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2392 
2393   PetscFunctionBegin;
2394   *array = a->a;
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2399 {
2400   PetscFunctionBegin;
2401   *array = NULL;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2406 {
2407   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2408   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2409   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2410   PetscErrorCode ierr;
2411 
2412   PetscFunctionBegin;
2413   /* Set the number of nonzeros in the new matrix */
2414   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2419 {
2420   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2421   PetscErrorCode ierr;
2422   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2423   PetscBLASInt   one=1;
2424 
2425   PetscFunctionBegin;
2426   if (str == SAME_NONZERO_PATTERN) {
2427     PetscScalar  alpha = a;
2428     PetscBLASInt bnz;
2429     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2430     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2431     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2432   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2433     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2434   } else {
2435     Mat      B;
2436     PetscInt *nnz;
2437     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2438     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2439     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2440     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2441     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2442     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2443     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2444     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2445     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2446     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2447     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2448     ierr = PetscFree(nnz);CHKERRQ(ierr);
2449   }
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2454 {
2455   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2456   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2457   MatScalar   *aa = a->a;
2458 
2459   PetscFunctionBegin;
2460   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2465 {
2466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2467   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2468   MatScalar   *aa = a->a;
2469 
2470   PetscFunctionBegin;
2471   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 /*
2476     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2477 */
2478 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2479 {
2480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2481   PetscErrorCode ierr;
2482   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2483   PetscInt       nz = a->i[m],row,*jj,mr,col;
2484 
2485   PetscFunctionBegin;
2486   *nn = n;
2487   if (!ia) PetscFunctionReturn(0);
2488   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2489   else {
2490     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2491     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2492     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2493     jj   = a->j;
2494     for (i=0; i<nz; i++) {
2495       collengths[jj[i]]++;
2496     }
2497     cia[0] = oshift;
2498     for (i=0; i<n; i++) {
2499       cia[i+1] = cia[i] + collengths[i];
2500     }
2501     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2502     jj   = a->j;
2503     for (row=0; row<m; row++) {
2504       mr = a->i[row+1] - a->i[row];
2505       for (i=0; i<mr; i++) {
2506         col = *jj++;
2507 
2508         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2509       }
2510     }
2511     ierr = PetscFree(collengths);CHKERRQ(ierr);
2512     *ia  = cia; *ja = cja;
2513   }
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2518 {
2519   PetscErrorCode ierr;
2520 
2521   PetscFunctionBegin;
2522   if (!ia) PetscFunctionReturn(0);
2523   ierr = PetscFree(*ia);CHKERRQ(ierr);
2524   ierr = PetscFree(*ja);CHKERRQ(ierr);
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 /*
2529  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2530  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2531  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2532  */
2533 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2534 {
2535   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2536   PetscErrorCode ierr;
2537   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2538   PetscInt       nz = a->i[m],row,*jj,mr,col;
2539   PetscInt       *cspidx;
2540 
2541   PetscFunctionBegin;
2542   *nn = n;
2543   if (!ia) PetscFunctionReturn(0);
2544 
2545   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2546   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2547   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2548   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2549   jj   = a->j;
2550   for (i=0; i<nz; i++) {
2551     collengths[jj[i]]++;
2552   }
2553   cia[0] = oshift;
2554   for (i=0; i<n; i++) {
2555     cia[i+1] = cia[i] + collengths[i];
2556   }
2557   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2558   jj   = a->j;
2559   for (row=0; row<m; row++) {
2560     mr = a->i[row+1] - a->i[row];
2561     for (i=0; i<mr; i++) {
2562       col = *jj++;
2563       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2564       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2565     }
2566   }
2567   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2568   *ia    = cia;
2569   *ja    = cja;
2570   *spidx = cspidx;
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2575 {
2576   PetscErrorCode ierr;
2577 
2578   PetscFunctionBegin;
2579   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2580   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2585 {
2586   PetscErrorCode ierr;
2587   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2588 
2589   PetscFunctionBegin;
2590   if (!Y->preallocated || !aij->nz) {
2591     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2592   }
2593   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 /* -------------------------------------------------------------------*/
2598 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2599                                        MatGetRow_SeqBAIJ,
2600                                        MatRestoreRow_SeqBAIJ,
2601                                        MatMult_SeqBAIJ_N,
2602                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2603                                        MatMultTranspose_SeqBAIJ,
2604                                        MatMultTransposeAdd_SeqBAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                /* 10*/ 0,
2609                                        MatLUFactor_SeqBAIJ,
2610                                        0,
2611                                        0,
2612                                        MatTranspose_SeqBAIJ,
2613                                /* 15*/ MatGetInfo_SeqBAIJ,
2614                                        MatEqual_SeqBAIJ,
2615                                        MatGetDiagonal_SeqBAIJ,
2616                                        MatDiagonalScale_SeqBAIJ,
2617                                        MatNorm_SeqBAIJ,
2618                                /* 20*/ 0,
2619                                        MatAssemblyEnd_SeqBAIJ,
2620                                        MatSetOption_SeqBAIJ,
2621                                        MatZeroEntries_SeqBAIJ,
2622                                /* 24*/ MatZeroRows_SeqBAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                /* 29*/ MatSetUp_SeqBAIJ,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                /* 34*/ MatDuplicate_SeqBAIJ,
2633                                        0,
2634                                        0,
2635                                        MatILUFactor_SeqBAIJ,
2636                                        0,
2637                                /* 39*/ MatAXPY_SeqBAIJ,
2638                                        MatCreateSubMatrices_SeqBAIJ,
2639                                        MatIncreaseOverlap_SeqBAIJ,
2640                                        MatGetValues_SeqBAIJ,
2641                                        MatCopy_SeqBAIJ,
2642                                /* 44*/ 0,
2643                                        MatScale_SeqBAIJ,
2644                                        MatShift_SeqBAIJ,
2645                                        0,
2646                                        MatZeroRowsColumns_SeqBAIJ,
2647                                /* 49*/ 0,
2648                                        MatGetRowIJ_SeqBAIJ,
2649                                        MatRestoreRowIJ_SeqBAIJ,
2650                                        MatGetColumnIJ_SeqBAIJ,
2651                                        MatRestoreColumnIJ_SeqBAIJ,
2652                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        MatSetValuesBlocked_SeqBAIJ,
2657                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2658                                        MatDestroy_SeqBAIJ,
2659                                        MatView_SeqBAIJ,
2660                                        0,
2661                                        0,
2662                                /* 64*/ 0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2668                                        0,
2669                                        MatConvert_Basic,
2670                                        0,
2671                                        0,
2672                                /* 74*/ 0,
2673                                        MatFDColoringApply_BAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                /* 79*/ 0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        MatLoad_SeqBAIJ,
2682                                /* 84*/ 0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                /* 89*/ 0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                /* 94*/ 0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 99*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                /*104*/ 0,
2703                                        MatRealPart_SeqBAIJ,
2704                                        MatImaginaryPart_SeqBAIJ,
2705                                        0,
2706                                        0,
2707                                /*109*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        MatMissingDiagonal_SeqBAIJ,
2712                                /*114*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /*119*/ 0,
2718                                        0,
2719                                        MatMultHermitianTranspose_SeqBAIJ,
2720                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2721                                        0,
2722                                /*124*/ 0,
2723                                        0,
2724                                        MatInvertBlockDiagonal_SeqBAIJ,
2725                                        0,
2726                                        0,
2727                                /*129*/ 0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                /*134*/ 0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                /*139*/ MatSetBlockSizes_Default,
2738                                        0,
2739                                        0,
2740                                        MatFDColoringSetUp_SeqXAIJ,
2741                                        0,
2742                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2743                                        MatDestroySubMatrices_SeqBAIJ
2744 };
2745 
2746 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2747 {
2748   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2749   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2754 
2755   /* allocate space for values if not already there */
2756   if (!aij->saved_values) {
2757     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2758     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2759   }
2760 
2761   /* copy values over */
2762   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2763   PetscFunctionReturn(0);
2764 }
2765 
2766 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2767 {
2768   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2769   PetscErrorCode ierr;
2770   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2771 
2772   PetscFunctionBegin;
2773   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2774   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2775 
2776   /* copy values over */
2777   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2783 
2784 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2785 {
2786   Mat_SeqBAIJ    *b;
2787   PetscErrorCode ierr;
2788   PetscInt       i,mbs,nbs,bs2;
2789   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2790 
2791   PetscFunctionBegin;
2792   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2793   if (nz == MAT_SKIP_ALLOCATION) {
2794     skipallocation = PETSC_TRUE;
2795     nz             = 0;
2796   }
2797 
2798   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2799   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2800   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2802 
2803   B->preallocated = PETSC_TRUE;
2804 
2805   mbs = B->rmap->n/bs;
2806   nbs = B->cmap->n/bs;
2807   bs2 = bs*bs;
2808 
2809   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2810 
2811   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2812   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2813   if (nnz) {
2814     for (i=0; i<mbs; i++) {
2815       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2816       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2817     }
2818   }
2819 
2820   b    = (Mat_SeqBAIJ*)B->data;
2821   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2822   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2823   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2824 
2825   if (!flg) {
2826     switch (bs) {
2827     case 1:
2828       B->ops->mult    = MatMult_SeqBAIJ_1;
2829       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2830       break;
2831     case 2:
2832       B->ops->mult    = MatMult_SeqBAIJ_2;
2833       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2834       break;
2835     case 3:
2836       B->ops->mult    = MatMult_SeqBAIJ_3;
2837       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2838       break;
2839     case 4:
2840       B->ops->mult    = MatMult_SeqBAIJ_4;
2841       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2842       break;
2843     case 5:
2844       B->ops->mult    = MatMult_SeqBAIJ_5;
2845       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2846       break;
2847     case 6:
2848       B->ops->mult    = MatMult_SeqBAIJ_6;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2850       break;
2851     case 7:
2852       B->ops->mult    = MatMult_SeqBAIJ_7;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2854       break;
2855     case 9:
2856 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2857       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2858       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2859 #else
2860       B->ops->mult    = MatMult_SeqBAIJ_N;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2862 #endif
2863       break;
2864     case 11:
2865       B->ops->mult    = MatMult_SeqBAIJ_11;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2867       break;
2868     case 15:
2869       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2870       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2871       break;
2872     default:
2873       B->ops->mult    = MatMult_SeqBAIJ_N;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2875       break;
2876     }
2877   }
2878   B->ops->sor = MatSOR_SeqBAIJ;
2879   b->mbs = mbs;
2880   b->nbs = nbs;
2881   if (!skipallocation) {
2882     if (!b->imax) {
2883       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2884       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2885 
2886       b->free_imax_ilen = PETSC_TRUE;
2887     }
2888     /* b->ilen will count nonzeros in each block row so far. */
2889     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2890     if (!nnz) {
2891       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2892       else if (nz < 0) nz = 1;
2893       nz = PetscMin(nz,nbs);
2894       for (i=0; i<mbs; i++) b->imax[i] = nz;
2895       nz = nz*mbs;
2896     } else {
2897       PetscInt64 nz64 = 0;
2898       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2899       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2900     }
2901 
2902     /* allocate the matrix space */
2903     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2904     if (B->structure_only) {
2905       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2906       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2907       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2908     } else {
2909       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2910       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2911       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2912     }
2913     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2914 
2915     if (B->structure_only) {
2916       b->singlemalloc = PETSC_FALSE;
2917       b->free_a       = PETSC_FALSE;
2918     } else {
2919       b->singlemalloc = PETSC_TRUE;
2920       b->free_a       = PETSC_TRUE;
2921     }
2922     b->free_ij = PETSC_TRUE;
2923 
2924     b->i[0] = 0;
2925     for (i=1; i<mbs+1; i++) {
2926       b->i[i] = b->i[i-1] + b->imax[i-1];
2927     }
2928 
2929   } else {
2930     b->free_a  = PETSC_FALSE;
2931     b->free_ij = PETSC_FALSE;
2932   }
2933 
2934   b->bs2              = bs2;
2935   b->mbs              = mbs;
2936   b->nz               = 0;
2937   b->maxnz            = nz;
2938   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2939   B->was_assembled    = PETSC_FALSE;
2940   B->assembled        = PETSC_FALSE;
2941   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2946 {
2947   PetscInt       i,m,nz,nz_max=0,*nnz;
2948   PetscScalar    *values=0;
2949   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2950   PetscErrorCode ierr;
2951 
2952   PetscFunctionBegin;
2953   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2954   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2955   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2956   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2958   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2959   m    = B->rmap->n/bs;
2960 
2961   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2962   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2963   for (i=0; i<m; i++) {
2964     nz = ii[i+1]- ii[i];
2965     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2966     nz_max = PetscMax(nz_max, nz);
2967     nnz[i] = nz;
2968   }
2969   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2970   ierr = PetscFree(nnz);CHKERRQ(ierr);
2971 
2972   values = (PetscScalar*)V;
2973   if (!values) {
2974     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2975   }
2976   for (i=0; i<m; i++) {
2977     PetscInt          ncols  = ii[i+1] - ii[i];
2978     const PetscInt    *icols = jj + ii[i];
2979     if (bs == 1 || !roworiented) {
2980       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2981       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2982     } else {
2983       PetscInt j;
2984       for (j=0; j<ncols; j++) {
2985         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2986         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2987       }
2988     }
2989   }
2990   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2991   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2992   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2993   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 /*@C
2998    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
2999 
3000    Not Collective
3001 
3002    Input Parameter:
3003 .  mat - a MATSEQBAIJ matrix
3004 
3005    Output Parameter:
3006 .   array - pointer to the data
3007 
3008    Level: intermediate
3009 
3010 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3011 @*/
3012 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3013 {
3014   PetscErrorCode ierr;
3015 
3016   PetscFunctionBegin;
3017   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 /*@C
3022    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3023 
3024    Not Collective
3025 
3026    Input Parameters:
3027 +  mat - a MATSEQBAIJ matrix
3028 -  array - pointer to the data
3029 
3030    Level: intermediate
3031 
3032 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3033 @*/
3034 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3035 {
3036   PetscErrorCode ierr;
3037 
3038   PetscFunctionBegin;
3039   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 /*MC
3044    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3045    block sparse compressed row format.
3046 
3047    Options Database Keys:
3048 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3049 
3050    Level: beginner
3051 
3052    Notes:
3053     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3054     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3055 
3056 .seealso: MatCreateSeqBAIJ()
3057 M*/
3058 
3059 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3060 
3061 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3062 {
3063   PetscErrorCode ierr;
3064   PetscMPIInt    size;
3065   Mat_SeqBAIJ    *b;
3066 
3067   PetscFunctionBegin;
3068   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3069   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3070 
3071   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3072   B->data = (void*)b;
3073   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3074 
3075   b->row          = 0;
3076   b->col          = 0;
3077   b->icol         = 0;
3078   b->reallocs     = 0;
3079   b->saved_values = 0;
3080 
3081   b->roworiented        = PETSC_TRUE;
3082   b->nonew              = 0;
3083   b->diag               = 0;
3084   B->spptr              = 0;
3085   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3086   b->keepnonzeropattern = PETSC_FALSE;
3087 
3088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3099 #if defined(PETSC_HAVE_HYPRE)
3100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3101 #endif
3102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3104   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3109 {
3110   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3111   PetscErrorCode ierr;
3112   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3113 
3114   PetscFunctionBegin;
3115   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3116 
3117   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3118     c->imax           = a->imax;
3119     c->ilen           = a->ilen;
3120     c->free_imax_ilen = PETSC_FALSE;
3121   } else {
3122     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3123     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3124     for (i=0; i<mbs; i++) {
3125       c->imax[i] = a->imax[i];
3126       c->ilen[i] = a->ilen[i];
3127     }
3128     c->free_imax_ilen = PETSC_TRUE;
3129   }
3130 
3131   /* allocate the matrix space */
3132   if (mallocmatspace) {
3133     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3134       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3135       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3136 
3137       c->i            = a->i;
3138       c->j            = a->j;
3139       c->singlemalloc = PETSC_FALSE;
3140       c->free_a       = PETSC_TRUE;
3141       c->free_ij      = PETSC_FALSE;
3142       c->parent       = A;
3143       C->preallocated = PETSC_TRUE;
3144       C->assembled    = PETSC_TRUE;
3145 
3146       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3147       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3148       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3149     } else {
3150       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3151       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3152 
3153       c->singlemalloc = PETSC_TRUE;
3154       c->free_a       = PETSC_TRUE;
3155       c->free_ij      = PETSC_TRUE;
3156 
3157       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3158       if (mbs > 0) {
3159         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3160         if (cpvalues == MAT_COPY_VALUES) {
3161           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3162         } else {
3163           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3164         }
3165       }
3166       C->preallocated = PETSC_TRUE;
3167       C->assembled    = PETSC_TRUE;
3168     }
3169   }
3170 
3171   c->roworiented = a->roworiented;
3172   c->nonew       = a->nonew;
3173 
3174   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3175   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3176 
3177   c->bs2         = a->bs2;
3178   c->mbs         = a->mbs;
3179   c->nbs         = a->nbs;
3180 
3181   if (a->diag) {
3182     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3183       c->diag      = a->diag;
3184       c->free_diag = PETSC_FALSE;
3185     } else {
3186       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3187       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3188       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3189       c->free_diag = PETSC_TRUE;
3190     }
3191   } else c->diag = 0;
3192 
3193   c->nz         = a->nz;
3194   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3195   c->solve_work = NULL;
3196   c->mult_work  = NULL;
3197   c->sor_workt  = NULL;
3198   c->sor_work   = NULL;
3199 
3200   c->compressedrow.use   = a->compressedrow.use;
3201   c->compressedrow.nrows = a->compressedrow.nrows;
3202   if (a->compressedrow.use) {
3203     i    = a->compressedrow.nrows;
3204     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3205     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3206     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3207     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3208   } else {
3209     c->compressedrow.use    = PETSC_FALSE;
3210     c->compressedrow.i      = NULL;
3211     c->compressedrow.rindex = NULL;
3212   }
3213   C->nonzerostate = A->nonzerostate;
3214 
3215   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3220 {
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3225   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3226   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3227   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3232 {
3233   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3234   PetscInt       *rowidxs,*colidxs;
3235   PetscScalar    *matvals;
3236   PetscErrorCode ierr;
3237 
3238   PetscFunctionBegin;
3239   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3240 
3241   /* read matrix header */
3242   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3243   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3244   M = header[1]; N = header[2]; nz = header[3];
3245   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3246   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3247   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3248 
3249   /* set block sizes from the viewer's .info file */
3250   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3251   /* set local and global sizes if not set already */
3252   if (mat->rmap->n < 0) mat->rmap->n = M;
3253   if (mat->cmap->n < 0) mat->cmap->n = N;
3254   if (mat->rmap->N < 0) mat->rmap->N = M;
3255   if (mat->cmap->N < 0) mat->cmap->N = N;
3256   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3257   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3258 
3259   /* check if the matrix sizes are correct */
3260   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3261   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3262   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3263   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3264   mbs = m/bs; nbs = n/bs;
3265 
3266   /* read in row lengths, column indices and nonzero values */
3267   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3268   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3269   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3270   sum = rowidxs[m];
3271   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3272 
3273   /* read in column indices and nonzero values */
3274   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3275   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3276   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3277 
3278   { /* preallocate matrix storage */
3279     PetscBT   bt; /* helper bit set to count nonzeros */
3280     PetscInt  *nnz;
3281 
3282     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3283     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3284     for (i=0; i<mbs; i++) {
3285       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3286       for (k=0; k<bs; k++)
3287         for (j=rowidxs[bs*i+k]; j<rowidxs[bs*i+k+1]; j++)
3288           if (!PetscBTLookupSet(bt,colidxs[j]/bs)) nnz[i]++;
3289     }
3290     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3291     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3292     ierr = PetscFree(nnz);CHKERRQ(ierr);
3293   }
3294 
3295   /* store matrix values */
3296   for (i=0; i<m; i++) {
3297     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3298     ierr = MatSetValues_SeqBAIJ(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3299   }
3300 
3301   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3302   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3303   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3304   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3305   PetscFunctionReturn(0);
3306 }
3307 
3308 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3309 {
3310   PetscErrorCode ierr;
3311   PetscBool      isbinary;
3312 
3313   PetscFunctionBegin;
3314   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3315   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3316   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3317   PetscFunctionReturn(0);
3318 }
3319 
3320 /*@C
3321    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3322    compressed row) format.  For good matrix assembly performance the
3323    user should preallocate the matrix storage by setting the parameter nz
3324    (or the array nnz).  By setting these parameters accurately, performance
3325    during matrix assembly can be increased by more than a factor of 50.
3326 
3327    Collective
3328 
3329    Input Parameters:
3330 +  comm - MPI communicator, set to PETSC_COMM_SELF
3331 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3332           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3333 .  m - number of rows
3334 .  n - number of columns
3335 .  nz - number of nonzero blocks  per block row (same for all rows)
3336 -  nnz - array containing the number of nonzero blocks in the various block rows
3337          (possibly different for each block row) or NULL
3338 
3339    Output Parameter:
3340 .  A - the matrix
3341 
3342    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3343    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3344    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3345 
3346    Options Database Keys:
3347 +   -mat_no_unroll - uses code that does not unroll the loops in the
3348                      block calculations (much slower)
3349 -    -mat_block_size - size of the blocks to use
3350 
3351    Level: intermediate
3352 
3353    Notes:
3354    The number of rows and columns must be divisible by blocksize.
3355 
3356    If the nnz parameter is given then the nz parameter is ignored
3357 
3358    A nonzero block is any block that as 1 or more nonzeros in it
3359 
3360    The block AIJ format is fully compatible with standard Fortran 77
3361    storage.  That is, the stored row and column indices can begin at
3362    either one (as in Fortran) or zero.  See the users' manual for details.
3363 
3364    Specify the preallocated storage with either nz or nnz (not both).
3365    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3366    allocation.  See Users-Manual: ch_mat for details.
3367    matrices.
3368 
3369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3370 @*/
3371 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3372 {
3373   PetscErrorCode ierr;
3374 
3375   PetscFunctionBegin;
3376   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3377   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3378   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3379   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3380   PetscFunctionReturn(0);
3381 }
3382 
3383 /*@C
3384    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3385    per row in the matrix. For good matrix assembly performance the
3386    user should preallocate the matrix storage by setting the parameter nz
3387    (or the array nnz).  By setting these parameters accurately, performance
3388    during matrix assembly can be increased by more than a factor of 50.
3389 
3390    Collective
3391 
3392    Input Parameters:
3393 +  B - the matrix
3394 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3395           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3396 .  nz - number of block nonzeros per block row (same for all rows)
3397 -  nnz - array containing the number of block nonzeros in the various block rows
3398          (possibly different for each block row) or NULL
3399 
3400    Options Database Keys:
3401 +   -mat_no_unroll - uses code that does not unroll the loops in the
3402                      block calculations (much slower)
3403 -   -mat_block_size - size of the blocks to use
3404 
3405    Level: intermediate
3406 
3407    Notes:
3408    If the nnz parameter is given then the nz parameter is ignored
3409 
3410    You can call MatGetInfo() to get information on how effective the preallocation was;
3411    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3412    You can also run with the option -info and look for messages with the string
3413    malloc in them to see if additional memory allocation was needed.
3414 
3415    The block AIJ format is fully compatible with standard Fortran 77
3416    storage.  That is, the stored row and column indices can begin at
3417    either one (as in Fortran) or zero.  See the users' manual for details.
3418 
3419    Specify the preallocated storage with either nz or nnz (not both).
3420    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3421    allocation.  See Users-Manual: ch_mat for details.
3422 
3423 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3424 @*/
3425 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3426 {
3427   PetscErrorCode ierr;
3428 
3429   PetscFunctionBegin;
3430   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3431   PetscValidType(B,1);
3432   PetscValidLogicalCollectiveInt(B,bs,2);
3433   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3434   PetscFunctionReturn(0);
3435 }
3436 
3437 /*@C
3438    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3439 
3440    Collective
3441 
3442    Input Parameters:
3443 +  B - the matrix
3444 .  i - the indices into j for the start of each local row (starts with zero)
3445 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3446 -  v - optional values in the matrix
3447 
3448    Level: advanced
3449 
3450    Notes:
3451    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3452    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3453    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3454    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3455    block column and the second index is over columns within a block.
3456 
3457    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3458 
3459 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3460 @*/
3461 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3462 {
3463   PetscErrorCode ierr;
3464 
3465   PetscFunctionBegin;
3466   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3467   PetscValidType(B,1);
3468   PetscValidLogicalCollectiveInt(B,bs,2);
3469   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3470   PetscFunctionReturn(0);
3471 }
3472 
3473 
3474 /*@
3475      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3476 
3477      Collective
3478 
3479    Input Parameters:
3480 +  comm - must be an MPI communicator of size 1
3481 .  bs - size of block
3482 .  m - number of rows
3483 .  n - number of columns
3484 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3485 .  j - column indices
3486 -  a - matrix values
3487 
3488    Output Parameter:
3489 .  mat - the matrix
3490 
3491    Level: advanced
3492 
3493    Notes:
3494        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3495     once the matrix is destroyed
3496 
3497        You cannot set new nonzero locations into this matrix, that will generate an error.
3498 
3499        The i and j indices are 0 based
3500 
3501        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3502 
3503       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3504       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3505       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3506       with column-major ordering within blocks.
3507 
3508 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3509 
3510 @*/
3511 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3512 {
3513   PetscErrorCode ierr;
3514   PetscInt       ii;
3515   Mat_SeqBAIJ    *baij;
3516 
3517   PetscFunctionBegin;
3518   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3519   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3520 
3521   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3522   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3523   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3524   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3525   baij = (Mat_SeqBAIJ*)(*mat)->data;
3526   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3527   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3528 
3529   baij->i = i;
3530   baij->j = j;
3531   baij->a = a;
3532 
3533   baij->singlemalloc = PETSC_FALSE;
3534   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3535   baij->free_a       = PETSC_FALSE;
3536   baij->free_ij      = PETSC_FALSE;
3537 
3538   for (ii=0; ii<m; ii++) {
3539     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3540 #if defined(PETSC_USE_DEBUG)
3541     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3542 #endif
3543   }
3544 #if defined(PETSC_USE_DEBUG)
3545   for (ii=0; ii<baij->i[m]; ii++) {
3546     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3547     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3548   }
3549 #endif
3550 
3551   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3552   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3553   PetscFunctionReturn(0);
3554 }
3555 
3556 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3557 {
3558   PetscErrorCode ierr;
3559   PetscMPIInt    size;
3560 
3561   PetscFunctionBegin;
3562   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3563   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3564     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3565   } else {
3566     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3567   }
3568   PetscFunctionReturn(0);
3569 }
3570