xref: /petsc/src/mat/impls/baij/seq/baij.c (revision e2606525cfbd3bdd7da360804ad5c3bf3ebbf4fb)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21 {
22   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
23   PetscInt       m,n,i;
24   PetscInt       ib,jb,bs = A->rmap->bs;
25   MatScalar      *a_val = a_aij->a;
26 
27   PetscFunctionBegin;
28   PetscCall(MatGetSize(A,&m,&n));
29   for (i=0; i<n; i++) reductions[i] = 0.0;
30   if (type == NORM_2) {
31     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
32       for (jb=0; jb<bs; jb++) {
33         for (ib=0; ib<bs; ib++) {
34           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
35           a_val++;
36         }
37       }
38     }
39   } else if (type == NORM_1) {
40     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
41       for (jb=0; jb<bs; jb++) {
42         for (ib=0; ib<bs; ib++) {
43           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
44           a_val++;
45         }
46       }
47     }
48   } else if (type == NORM_INFINITY) {
49     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
50       for (jb=0; jb<bs; jb++) {
51         for (ib=0; ib<bs; ib++) {
52           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
53           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
54           a_val++;
55         }
56       }
57     }
58   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
59     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
60       for (jb=0; jb<bs; jb++) {
61         for (ib=0; ib<bs; ib++) {
62           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
63           a_val++;
64         }
65       }
66     }
67   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
68     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
69       for (jb=0; jb<bs; jb++) {
70         for (ib=0; ib<bs; ib++) {
71           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
72           a_val++;
73         }
74       }
75     }
76   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
77   if (type == NORM_2) {
78     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
79   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (i=0; i<n; i++) reductions[i] /= m;
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
86 {
87   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
88   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
89   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
90   PetscReal      shift = 0.0;
91   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
92 
93   PetscFunctionBegin;
94   allowzeropivot = PetscNot(A->erroriffailure);
95 
96   if (a->idiagvalid) {
97     if (values) *values = a->idiag;
98     PetscFunctionReturn(0);
99   }
100   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
101   diag_offset = a->diag;
102   if (!a->idiag) {
103     PetscCall(PetscMalloc1(bs2*mbs,&a->idiag));
104     PetscCall(PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar)));
105   }
106   diag  = a->idiag;
107   if (values) *values = a->idiag;
108   /* factor and invert each block */
109   switch (bs) {
110   case 1:
111     for (i=0; i<mbs; i++) {
112       odiag    = v + 1*diag_offset[i];
113       diag[0]  = odiag[0];
114 
115       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
116         if (allowzeropivot) {
117           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
119           A->factorerror_zeropivot_row   = i;
120           PetscCall(PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i));
121         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
122       }
123 
124       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
125       diag    += 1;
126     }
127     break;
128   case 2:
129     for (i=0; i<mbs; i++) {
130       odiag    = v + 4*diag_offset[i];
131       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
132       PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected));
133       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
134       diag    += 4;
135     }
136     break;
137   case 3:
138     for (i=0; i<mbs; i++) {
139       odiag    = v + 9*diag_offset[i];
140       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
141       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
142       diag[8]  = odiag[8];
143       PetscCall(PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected));
144       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145       diag    += 9;
146     }
147     break;
148   case 4:
149     for (i=0; i<mbs; i++) {
150       odiag  = v + 16*diag_offset[i];
151       PetscCall(PetscArraycpy(diag,odiag,16));
152       PetscCall(PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected));
153       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
154       diag  += 16;
155     }
156     break;
157   case 5:
158     for (i=0; i<mbs; i++) {
159       odiag  = v + 25*diag_offset[i];
160       PetscCall(PetscArraycpy(diag,odiag,25));
161       PetscCall(PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag  += 25;
164     }
165     break;
166   case 6:
167     for (i=0; i<mbs; i++) {
168       odiag  = v + 36*diag_offset[i];
169       PetscCall(PetscArraycpy(diag,odiag,36));
170       PetscCall(PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag  += 36;
173     }
174     break;
175   case 7:
176     for (i=0; i<mbs; i++) {
177       odiag  = v + 49*diag_offset[i];
178       PetscCall(PetscArraycpy(diag,odiag,49));
179       PetscCall(PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag  += 49;
182     }
183     break;
184   default:
185     PetscCall(PetscMalloc2(bs,&v_work,bs,&v_pivots));
186     for (i=0; i<mbs; i++) {
187       odiag  = v + bs2*diag_offset[i];
188       PetscCall(PetscArraycpy(diag,odiag,bs2));
189       PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected));
190       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
191       diag  += bs2;
192     }
193     PetscCall(PetscFree2(v_work,v_pivots));
194   }
195   a->idiagvalid = PETSC_TRUE;
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
200 {
201   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
202   PetscScalar       *x,*work,*w,*workt,*t;
203   const MatScalar   *v,*aa = a->a, *idiag;
204   const PetscScalar *b,*xb;
205   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
206   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
207   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
208 
209   PetscFunctionBegin;
210   its = its*lits;
211   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
212   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
213   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
214   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
215   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
216 
217   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A,NULL));
218 
219   if (!m) PetscFunctionReturn(0);
220   diag  = a->diag;
221   idiag = a->idiag;
222   k    = PetscMax(A->rmap->n,A->cmap->n);
223   if (!a->mult_work) {
224     PetscCall(PetscMalloc1(k+1,&a->mult_work));
225   }
226   if (!a->sor_workt) {
227     PetscCall(PetscMalloc1(k,&a->sor_workt));
228   }
229   if (!a->sor_work) {
230     PetscCall(PetscMalloc1(bs,&a->sor_work));
231   }
232   work = a->mult_work;
233   t    = a->sor_workt;
234   w    = a->sor_work;
235 
236   PetscCall(VecGetArray(xx,&x));
237   PetscCall(VecGetArrayRead(bb,&b));
238 
239   if (flag & SOR_ZERO_INITIAL_GUESS) {
240     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
241       switch (bs) {
242       case 1:
243         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
244         t[0] = b[0];
245         i2     = 1;
246         idiag += 1;
247         for (i=1; i<m; i++) {
248           v  = aa + ai[i];
249           vi = aj + ai[i];
250           nz = diag[i] - ai[i];
251           s[0] = b[i2];
252           for (j=0; j<nz; j++) {
253             xw[0] = x[vi[j]];
254             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
255           }
256           t[i2] = s[0];
257           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
258           x[i2]  = xw[0];
259           idiag += 1;
260           i2    += 1;
261         }
262         break;
263       case 2:
264         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1];
266         i2     = 2;
267         idiag += 4;
268         for (i=1; i<m; i++) {
269           v  = aa + 4*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1];
273           for (j=0; j<nz; j++) {
274             idx = 2*vi[j];
275             it  = 4*j;
276             xw[0] = x[idx]; xw[1] = x[1+idx];
277             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
278           }
279           t[i2] = s[0]; t[i2+1] = s[1];
280           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
281           x[i2]   = xw[0]; x[i2+1] = xw[1];
282           idiag  += 4;
283           i2     += 2;
284         }
285         break;
286       case 3:
287         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
289         i2     = 3;
290         idiag += 9;
291         for (i=1; i<m; i++) {
292           v  = aa + 9*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
296           while (nz--) {
297             idx = 3*(*vi++);
298             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
300             v  += 9;
301           }
302           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
303           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
304           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
305           idiag  += 9;
306           i2     += 3;
307         }
308         break;
309       case 4:
310         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
311         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
312         i2     = 4;
313         idiag += 16;
314         for (i=1; i<m; i++) {
315           v  = aa + 16*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
319           while (nz--) {
320             idx = 4*(*vi++);
321             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
322             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
323             v  += 16;
324           }
325           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
326           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
328           idiag  += 16;
329           i2     += 4;
330         }
331         break;
332       case 5:
333         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
335         i2     = 5;
336         idiag += 25;
337         for (i=1; i<m; i++) {
338           v  = aa + 25*ai[i];
339           vi = aj + ai[i];
340           nz = diag[i] - ai[i];
341           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
342           while (nz--) {
343             idx = 5*(*vi++);
344             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
345             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
346             v  += 25;
347           }
348           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
349           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
350           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
351           idiag  += 25;
352           i2     += 5;
353         }
354         break;
355       case 6:
356         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
357         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
358         i2     = 6;
359         idiag += 36;
360         for (i=1; i<m; i++) {
361           v  = aa + 36*ai[i];
362           vi = aj + ai[i];
363           nz = diag[i] - ai[i];
364           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
365           while (nz--) {
366             idx = 6*(*vi++);
367             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
368             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
369             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
370             v  += 36;
371           }
372           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
373           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
374           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
375           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
376           idiag  += 36;
377           i2     += 6;
378         }
379         break;
380       case 7:
381         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
382         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
383         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
384         i2     = 7;
385         idiag += 49;
386         for (i=1; i<m; i++) {
387           v  = aa + 49*ai[i];
388           vi = aj + ai[i];
389           nz = diag[i] - ai[i];
390           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
391           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
392           while (nz--) {
393             idx = 7*(*vi++);
394             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
395             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
396             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
397             v  += 49;
398           }
399           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
400           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
401           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
402           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
403           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
404           idiag  += 49;
405           i2     += 7;
406         }
407         break;
408       default:
409         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
410         PetscCall(PetscArraycpy(t,b,bs));
411         i2     = bs;
412         idiag += bs2;
413         for (i=1; i<m; i++) {
414           v  = aa + bs2*ai[i];
415           vi = aj + ai[i];
416           nz = diag[i] - ai[i];
417 
418           PetscCall(PetscArraycpy(w,b+i2,bs));
419           /* copy all rows of x that are needed into contiguous space */
420           workt = work;
421           for (j=0; j<nz; j++) {
422             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
423             workt += bs;
424           }
425           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
426           PetscCall(PetscArraycpy(t+i2,w,bs));
427           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
428 
429           idiag += bs2;
430           i2    += bs;
431         }
432         break;
433       }
434       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
435       PetscCall(PetscLogFlops(1.0*bs2*a->nz));
436       xb = t;
437     }
438     else xb = b;
439     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
440       idiag = a->idiag+bs2*(a->mbs-1);
441       i2 = bs * (m-1);
442       switch (bs) {
443       case 1:
444         s[0]  = xb[i2];
445         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
446         x[i2] = xw[0];
447         i2   -= 1;
448         for (i=m-2; i>=0; i--) {
449           v  = aa + (diag[i]+1);
450           vi = aj + diag[i] + 1;
451           nz = ai[i+1] - diag[i] - 1;
452           s[0] = xb[i2];
453           for (j=0; j<nz; j++) {
454             xw[0] = x[vi[j]];
455             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
456           }
457           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
458           x[i2]  = xw[0];
459           idiag -= 1;
460           i2    -= 1;
461         }
462         break;
463       case 2:
464         s[0]  = xb[i2]; s[1] = xb[i2+1];
465         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1];
467         i2    -= 2;
468         idiag -= 4;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 4*(diag[i] + 1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1];
474           for (j=0; j<nz; j++) {
475             idx = 2*vi[j];
476             it  = 4*j;
477             xw[0] = x[idx]; xw[1] = x[1+idx];
478             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
479           }
480           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
481           x[i2]   = xw[0]; x[i2+1] = xw[1];
482           idiag  -= 4;
483           i2     -= 2;
484         }
485         break;
486       case 3:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
488         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
490         i2    -= 3;
491         idiag -= 9;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 9*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
497           while (nz--) {
498             idx = 3*(*vi++);
499             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
501             v  += 9;
502           }
503           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
504           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
505           idiag  -= 9;
506           i2     -= 3;
507         }
508         break;
509       case 4:
510         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
511         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
512         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
513         i2    -= 4;
514         idiag -= 16;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 16*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
520           while (nz--) {
521             idx = 4*(*vi++);
522             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
523             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
524             v  += 16;
525           }
526           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
527           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
528           idiag  -= 16;
529           i2     -= 4;
530         }
531         break;
532       case 5:
533         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
534         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
535         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
536         i2    -= 5;
537         idiag -= 25;
538         for (i=m-2; i>=0; i--) {
539           v  = aa + 25*(diag[i]+1);
540           vi = aj + diag[i] + 1;
541           nz = ai[i+1] - diag[i] - 1;
542           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
543           while (nz--) {
544             idx = 5*(*vi++);
545             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
546             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
547             v  += 25;
548           }
549           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
550           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
551           idiag  -= 25;
552           i2     -= 5;
553         }
554         break;
555       case 6:
556         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
557         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
558         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
559         i2    -= 6;
560         idiag -= 36;
561         for (i=m-2; i>=0; i--) {
562           v  = aa + 36*(diag[i]+1);
563           vi = aj + diag[i] + 1;
564           nz = ai[i+1] - diag[i] - 1;
565           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
566           while (nz--) {
567             idx = 6*(*vi++);
568             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
569             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
570             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
571             v  += 36;
572           }
573           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
574           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
575           idiag  -= 36;
576           i2     -= 6;
577         }
578         break;
579       case 7:
580         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
581         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
582         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
583         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
584         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
585         i2    -= 7;
586         idiag -= 49;
587         for (i=m-2; i>=0; i--) {
588           v  = aa + 49*(diag[i]+1);
589           vi = aj + diag[i] + 1;
590           nz = ai[i+1] - diag[i] - 1;
591           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
592           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
593           while (nz--) {
594             idx = 7*(*vi++);
595             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
596             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
597             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
598             v  += 49;
599           }
600           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
601           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
602           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
603           idiag  -= 49;
604           i2     -= 7;
605         }
606         break;
607       default:
608         PetscCall(PetscArraycpy(w,xb+i2,bs));
609         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
610         i2    -= bs;
611         idiag -= bs2;
612         for (i=m-2; i>=0; i--) {
613           v  = aa + bs2*(diag[i]+1);
614           vi = aj + diag[i] + 1;
615           nz = ai[i+1] - diag[i] - 1;
616 
617           PetscCall(PetscArraycpy(w,xb+i2,bs));
618           /* copy all rows of x that are needed into contiguous space */
619           workt = work;
620           for (j=0; j<nz; j++) {
621             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
622             workt += bs;
623           }
624           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
625           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
626 
627           idiag -= bs2;
628           i2    -= bs;
629         }
630         break;
631       }
632       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
633     }
634     its--;
635   }
636   while (its--) {
637     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
638       idiag = a->idiag;
639       i2 = 0;
640       switch (bs) {
641       case 1:
642         for (i=0; i<m; i++) {
643           v  = aa + ai[i];
644           vi = aj + ai[i];
645           nz = ai[i+1] - ai[i];
646           s[0] = b[i2];
647           for (j=0; j<nz; j++) {
648             xw[0] = x[vi[j]];
649             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
650           }
651           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
652           x[i2] += xw[0];
653           idiag += 1;
654           i2    += 1;
655         }
656         break;
657       case 2:
658         for (i=0; i<m; i++) {
659           v  = aa + 4*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1];
663           for (j=0; j<nz; j++) {
664             idx = 2*vi[j];
665             it  = 4*j;
666             xw[0] = x[idx]; xw[1] = x[1+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
668           }
669           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
670           x[i2]  += xw[0]; x[i2+1] += xw[1];
671           idiag  += 4;
672           i2     += 2;
673         }
674         break;
675       case 3:
676         for (i=0; i<m; i++) {
677           v  = aa + 9*ai[i];
678           vi = aj + ai[i];
679           nz = ai[i+1] - ai[i];
680           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
681           while (nz--) {
682             idx = 3*(*vi++);
683             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
684             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
685             v  += 9;
686           }
687           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
688           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
689           idiag  += 9;
690           i2     += 3;
691         }
692         break;
693       case 4:
694         for (i=0; i<m; i++) {
695           v  = aa + 16*ai[i];
696           vi = aj + ai[i];
697           nz = ai[i+1] - ai[i];
698           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
699           while (nz--) {
700             idx = 4*(*vi++);
701             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
702             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
703             v  += 16;
704           }
705           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
706           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
707           idiag  += 16;
708           i2     += 4;
709         }
710         break;
711       case 5:
712         for (i=0; i<m; i++) {
713           v  = aa + 25*ai[i];
714           vi = aj + ai[i];
715           nz = ai[i+1] - ai[i];
716           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
717           while (nz--) {
718             idx = 5*(*vi++);
719             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
720             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
721             v  += 25;
722           }
723           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
724           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
725           idiag  += 25;
726           i2     += 5;
727         }
728         break;
729       case 6:
730         for (i=0; i<m; i++) {
731           v  = aa + 36*ai[i];
732           vi = aj + ai[i];
733           nz = ai[i+1] - ai[i];
734           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
735           while (nz--) {
736             idx = 6*(*vi++);
737             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
738             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
739             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
740             v  += 36;
741           }
742           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
743           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
744           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
745           idiag  += 36;
746           i2     += 6;
747         }
748         break;
749       case 7:
750         for (i=0; i<m; i++) {
751           v  = aa + 49*ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
755           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
756           while (nz--) {
757             idx = 7*(*vi++);
758             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
759             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
761             v  += 49;
762           }
763           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
764           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
765           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
766           idiag  += 49;
767           i2     += 7;
768         }
769         break;
770       default:
771         for (i=0; i<m; i++) {
772           v  = aa + bs2*ai[i];
773           vi = aj + ai[i];
774           nz = ai[i+1] - ai[i];
775 
776           PetscCall(PetscArraycpy(w,b+i2,bs));
777           /* copy all rows of x that are needed into contiguous space */
778           workt = work;
779           for (j=0; j<nz; j++) {
780             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
781             workt += bs;
782           }
783           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
784           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
785 
786           idiag += bs2;
787           i2    += bs;
788         }
789         break;
790       }
791       PetscCall(PetscLogFlops(2.0*bs2*a->nz));
792     }
793     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
794       idiag = a->idiag+bs2*(a->mbs-1);
795       i2 = bs * (m-1);
796       switch (bs) {
797       case 1:
798         for (i=m-1; i>=0; i--) {
799           v  = aa + ai[i];
800           vi = aj + ai[i];
801           nz = ai[i+1] - ai[i];
802           s[0] = b[i2];
803           for (j=0; j<nz; j++) {
804             xw[0] = x[vi[j]];
805             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
806           }
807           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
808           x[i2] += xw[0];
809           idiag -= 1;
810           i2    -= 1;
811         }
812         break;
813       case 2:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 4*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1];
819           for (j=0; j<nz; j++) {
820             idx = 2*vi[j];
821             it  = 4*j;
822             xw[0] = x[idx]; xw[1] = x[1+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
824           }
825           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
826           x[i2]  += xw[0]; x[i2+1] += xw[1];
827           idiag  -= 4;
828           i2     -= 2;
829         }
830         break;
831       case 3:
832         for (i=m-1; i>=0; i--) {
833           v  = aa + 9*ai[i];
834           vi = aj + ai[i];
835           nz = ai[i+1] - ai[i];
836           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
837           while (nz--) {
838             idx = 3*(*vi++);
839             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
840             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
841             v  += 9;
842           }
843           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
844           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
845           idiag  -= 9;
846           i2     -= 3;
847         }
848         break;
849       case 4:
850         for (i=m-1; i>=0; i--) {
851           v  = aa + 16*ai[i];
852           vi = aj + ai[i];
853           nz = ai[i+1] - ai[i];
854           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
855           while (nz--) {
856             idx = 4*(*vi++);
857             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
858             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
859             v  += 16;
860           }
861           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
862           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
863           idiag  -= 16;
864           i2     -= 4;
865         }
866         break;
867       case 5:
868         for (i=m-1; i>=0; i--) {
869           v  = aa + 25*ai[i];
870           vi = aj + ai[i];
871           nz = ai[i+1] - ai[i];
872           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
873           while (nz--) {
874             idx = 5*(*vi++);
875             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
876             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
877             v  += 25;
878           }
879           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
880           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
881           idiag  -= 25;
882           i2     -= 5;
883         }
884         break;
885       case 6:
886         for (i=m-1; i>=0; i--) {
887           v  = aa + 36*ai[i];
888           vi = aj + ai[i];
889           nz = ai[i+1] - ai[i];
890           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
891           while (nz--) {
892             idx = 6*(*vi++);
893             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
894             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
895             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
896             v  += 36;
897           }
898           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
899           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
900           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
901           idiag  -= 36;
902           i2     -= 6;
903         }
904         break;
905       case 7:
906         for (i=m-1; i>=0; i--) {
907           v  = aa + 49*ai[i];
908           vi = aj + ai[i];
909           nz = ai[i+1] - ai[i];
910           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
911           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
912           while (nz--) {
913             idx = 7*(*vi++);
914             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
915             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
916             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
917             v  += 49;
918           }
919           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
920           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
921           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
922           idiag  -= 49;
923           i2     -= 7;
924         }
925         break;
926       default:
927         for (i=m-1; i>=0; i--) {
928           v  = aa + bs2*ai[i];
929           vi = aj + ai[i];
930           nz = ai[i+1] - ai[i];
931 
932           PetscCall(PetscArraycpy(w,b+i2,bs));
933           /* copy all rows of x that are needed into contiguous space */
934           workt = work;
935           for (j=0; j<nz; j++) {
936             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
937             workt += bs;
938           }
939           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
940           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
941 
942           idiag -= bs2;
943           i2    -= bs;
944         }
945         break;
946       }
947       PetscCall(PetscLogFlops(2.0*bs2*(a->nz)));
948     }
949   }
950   PetscCall(VecRestoreArray(xx,&x));
951   PetscCall(VecRestoreArrayRead(bb,&b));
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956     Special version for direct calls from Fortran (Used in PETSc-fun3d)
957 */
958 #if defined(PETSC_HAVE_FORTRAN_CAPS)
959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
961 #define matsetvaluesblocked4_ matsetvaluesblocked4
962 #endif
963 
964 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
965 {
966   Mat               A  = *AA;
967   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
968   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
969   PetscInt          *ai    =a->i,*ailen=a->ilen;
970   PetscInt          *aj    =a->j,stepval,lastcol = -1;
971   const PetscScalar *value = v;
972   MatScalar         *ap,*aa = a->a,*bap;
973 
974   PetscFunctionBegin;
975   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
976   stepval = (n-1)*4;
977   for (k=0; k<m; k++) { /* loop over added rows */
978     row  = im[k];
979     rp   = aj + ai[row];
980     ap   = aa + 16*ai[row];
981     nrow = ailen[row];
982     low  = 0;
983     high = nrow;
984     for (l=0; l<n; l++) { /* loop over added columns */
985       col = in[l];
986       if (col <= lastcol)  low = 0;
987       else                high = nrow;
988       lastcol = col;
989       value   = v + k*(stepval+4 + l)*4;
990       while (high-low > 7) {
991         t = (low+high)/2;
992         if (rp[t] > col) high = t;
993         else             low  = t;
994       }
995       for (i=low; i<high; i++) {
996         if (rp[i] > col) break;
997         if (rp[i] == col) {
998           bap = ap +  16*i;
999           for (ii=0; ii<4; ii++,value+=stepval) {
1000             for (jj=ii; jj<16; jj+=4) {
1001               bap[jj] += *value++;
1002             }
1003           }
1004           goto noinsert2;
1005         }
1006       }
1007       N = nrow++ - 1;
1008       high++; /* added new column index thus must search to one higher than before */
1009       /* shift up all the later entries in this row */
1010       for (ii=N; ii>=i; ii--) {
1011         rp[ii+1] = rp[ii];
1012         PetscCallVoid(PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16));
1013       }
1014       if (N >= i) {
1015         PetscCallVoid(PetscArrayzero(ap+16*i,16));
1016       }
1017       rp[i] = col;
1018       bap   = ap +  16*i;
1019       for (ii=0; ii<4; ii++,value+=stepval) {
1020         for (jj=ii; jj<16; jj+=4) {
1021           bap[jj] = *value++;
1022         }
1023       }
1024       noinsert2:;
1025       low = i;
1026     }
1027     ailen[row] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1033 #define matsetvalues4_ MATSETVALUES4
1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1035 #define matsetvalues4_ matsetvalues4
1036 #endif
1037 
1038 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1039 {
1040   Mat         A  = *AA;
1041   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1042   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1043   PetscInt    *ai=a->i,*ailen=a->ilen;
1044   PetscInt    *aj=a->j,brow,bcol;
1045   PetscInt    ridx,cidx,lastcol = -1;
1046   MatScalar   *ap,value,*aa=a->a,*bap;
1047 
1048   PetscFunctionBegin;
1049   for (k=0; k<m; k++) { /* loop over added rows */
1050     row  = im[k]; brow = row/4;
1051     rp   = aj + ai[brow];
1052     ap   = aa + 16*ai[brow];
1053     nrow = ailen[brow];
1054     low  = 0;
1055     high = nrow;
1056     for (l=0; l<n; l++) { /* loop over added columns */
1057       col   = in[l]; bcol = col/4;
1058       ridx  = row % 4; cidx = col % 4;
1059       value = v[l + k*n];
1060       if (col <= lastcol)  low = 0;
1061       else                high = nrow;
1062       lastcol = col;
1063       while (high-low > 7) {
1064         t = (low+high)/2;
1065         if (rp[t] > bcol) high = t;
1066         else              low  = t;
1067       }
1068       for (i=low; i<high; i++) {
1069         if (rp[i] > bcol) break;
1070         if (rp[i] == bcol) {
1071           bap   = ap +  16*i + 4*cidx + ridx;
1072           *bap += value;
1073           goto noinsert1;
1074         }
1075       }
1076       N = nrow++ - 1;
1077       high++; /* added new column thus must search to one higher than before */
1078       /* shift up all the later entries in this row */
1079       PetscCallVoid(PetscArraymove(rp+i+1,rp+i,N-i+1));
1080       PetscCallVoid(PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1)));
1081       PetscCallVoid(PetscArrayzero(ap+16*i,16));
1082       rp[i]                    = bcol;
1083       ap[16*i + 4*cidx + ridx] = value;
1084 noinsert1:;
1085       low = i;
1086     }
1087     ailen[brow] = nrow;
1088   }
1089   PetscFunctionReturnVoid();
1090 }
1091 
1092 /*
1093      Checks for missing diagonals
1094 */
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscInt       *diag,*ii = a->i,i;
1099 
1100   PetscFunctionBegin;
1101   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1102   *missing = PETSC_FALSE;
1103   if (A->rmap->n > 0 && !ii) {
1104     *missing = PETSC_TRUE;
1105     if (d) *d = 0;
1106     PetscCall(PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"));
1107   } else {
1108     PetscInt n;
1109     n = PetscMin(a->mbs, a->nbs);
1110     diag = a->diag;
1111     for (i=0; i<n; i++) {
1112       if (diag[i] >= ii[i+1]) {
1113         *missing = PETSC_TRUE;
1114         if (d) *d = i;
1115         PetscCall(PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i));
1116         break;
1117       }
1118     }
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1124 {
1125   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1126   PetscInt       i,j,m = a->mbs;
1127 
1128   PetscFunctionBegin;
1129   if (!a->diag) {
1130     PetscCall(PetscMalloc1(m,&a->diag));
1131     PetscCall(PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt)));
1132     a->free_diag = PETSC_TRUE;
1133   }
1134   for (i=0; i<m; i++) {
1135     a->diag[i] = a->i[i+1];
1136     for (j=a->i[i]; j<a->i[i+1]; j++) {
1137       if (a->j[j] == i) {
1138         a->diag[i] = j;
1139         break;
1140       }
1141     }
1142   }
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1147 {
1148   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1149   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1150   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1151 
1152   PetscFunctionBegin;
1153   *nn = n;
1154   if (!ia) PetscFunctionReturn(0);
1155   if (symmetric) {
1156     PetscCall(MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja));
1157     nz   = tia[n];
1158   } else {
1159     tia = a->i; tja = a->j;
1160   }
1161 
1162   if (!blockcompressed && bs > 1) {
1163     (*nn) *= bs;
1164     /* malloc & create the natural set of indices */
1165     PetscCall(PetscMalloc1((n+1)*bs,ia));
1166     if (n) {
1167       (*ia)[0] = oshift;
1168       for (j=1; j<bs; j++) {
1169         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1170       }
1171     }
1172 
1173     for (i=1; i<n; i++) {
1174       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1175       for (j=1; j<bs; j++) {
1176         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1177       }
1178     }
1179     if (n) {
1180       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1181     }
1182 
1183     if (inja) {
1184       PetscCall(PetscMalloc1(nz*bs*bs,ja));
1185       cnt = 0;
1186       for (i=0; i<n; i++) {
1187         for (j=0; j<bs; j++) {
1188           for (k=tia[i]; k<tia[i+1]; k++) {
1189             for (l=0; l<bs; l++) {
1190               (*ja)[cnt++] = bs*tja[k] + l;
1191             }
1192           }
1193         }
1194       }
1195     }
1196 
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       PetscCall(PetscFree(tia));
1199       PetscCall(PetscFree(tja));
1200     }
1201   } else if (oshift == 1) {
1202     if (symmetric) {
1203       nz = tia[A->rmap->n/bs];
1204       /*  add 1 to i and j indices */
1205       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1206       *ia = tia;
1207       if (ja) {
1208         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1209         *ja = tja;
1210       }
1211     } else {
1212       nz = a->i[A->rmap->n/bs];
1213       /* malloc space and  add 1 to i and j indices */
1214       PetscCall(PetscMalloc1(A->rmap->n/bs+1,ia));
1215       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1216       if (ja) {
1217         PetscCall(PetscMalloc1(nz,ja));
1218         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1219       }
1220     }
1221   } else {
1222     *ia = tia;
1223     if (ja) *ja = tja;
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1229 {
1230   PetscFunctionBegin;
1231   if (!ia) PetscFunctionReturn(0);
1232   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1233     PetscCall(PetscFree(*ia));
1234     if (ja) PetscCall(PetscFree(*ja));
1235   }
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1240 {
1241   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1242 
1243   PetscFunctionBegin;
1244 #if defined(PETSC_USE_LOG)
1245   PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz);
1246 #endif
1247   PetscCall(MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i));
1248   PetscCall(ISDestroy(&a->row));
1249   PetscCall(ISDestroy(&a->col));
1250   if (a->free_diag) PetscCall(PetscFree(a->diag));
1251   PetscCall(PetscFree(a->idiag));
1252   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax,a->ilen));
1253   PetscCall(PetscFree(a->solve_work));
1254   PetscCall(PetscFree(a->mult_work));
1255   PetscCall(PetscFree(a->sor_workt));
1256   PetscCall(PetscFree(a->sor_work));
1257   PetscCall(ISDestroy(&a->icol));
1258   PetscCall(PetscFree(a->saved_values));
1259   PetscCall(PetscFree2(a->compressedrow.i,a->compressedrow.rindex));
1260 
1261   PetscCall(MatDestroy(&a->sbaijMat));
1262   PetscCall(MatDestroy(&a->parent));
1263   PetscCall(PetscFree(A->data));
1264 
1265   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
1266   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL));
1267   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL));
1268   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL));
1269   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL));
1270   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL));
1271   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL));
1272   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL));
1273   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL));
1274   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL));
1275   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL));
1277 #if defined(PETSC_HAVE_HYPRE)
1278   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL));
1279 #endif
1280   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL));
1281   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1286 {
1287   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1288 
1289   PetscFunctionBegin;
1290   switch (op) {
1291   case MAT_ROW_ORIENTED:
1292     a->roworiented = flg;
1293     break;
1294   case MAT_KEEP_NONZERO_PATTERN:
1295     a->keepnonzeropattern = flg;
1296     break;
1297   case MAT_NEW_NONZERO_LOCATIONS:
1298     a->nonew = (flg ? 0 : 1);
1299     break;
1300   case MAT_NEW_NONZERO_LOCATION_ERR:
1301     a->nonew = (flg ? -1 : 0);
1302     break;
1303   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1304     a->nonew = (flg ? -2 : 0);
1305     break;
1306   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1307     a->nounused = (flg ? -1 : 0);
1308     break;
1309   case MAT_FORCE_DIAGONAL_ENTRIES:
1310   case MAT_IGNORE_OFF_PROC_ENTRIES:
1311   case MAT_USE_HASH_TABLE:
1312   case MAT_SORTED_FULL:
1313     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1314     break;
1315   case MAT_SPD:
1316   case MAT_SYMMETRIC:
1317   case MAT_STRUCTURALLY_SYMMETRIC:
1318   case MAT_HERMITIAN:
1319   case MAT_SYMMETRY_ETERNAL:
1320   case MAT_SUBMAT_SINGLEIS:
1321   case MAT_STRUCTURE_ONLY:
1322     /* These options are handled directly by MatSetOption() */
1323     break;
1324   default:
1325     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1326   }
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1331 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1332 {
1333   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1334   MatScalar      *aa_i;
1335   PetscScalar    *v_i;
1336 
1337   PetscFunctionBegin;
1338   bs  = A->rmap->bs;
1339   bs2 = bs*bs;
1340   PetscCheck(row >= 0 && row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row);
1341 
1342   bn  = row/bs;   /* Block number */
1343   bp  = row % bs; /* Block Position */
1344   M   = ai[bn+1] - ai[bn];
1345   *nz = bs*M;
1346 
1347   if (v) {
1348     *v = NULL;
1349     if (*nz) {
1350       PetscCall(PetscMalloc1(*nz,v));
1351       for (i=0; i<M; i++) { /* for each block in the block row */
1352         v_i  = *v + i*bs;
1353         aa_i = aa + bs2*(ai[bn] + i);
1354         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1355       }
1356     }
1357   }
1358 
1359   if (idx) {
1360     *idx = NULL;
1361     if (*nz) {
1362       PetscCall(PetscMalloc1(*nz,idx));
1363       for (i=0; i<M; i++) { /* for each block in the block row */
1364         idx_i = *idx + i*bs;
1365         itmp  = bs*aj[ai[bn] + i];
1366         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1367       }
1368     }
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1374 {
1375   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1376 
1377   PetscFunctionBegin;
1378   PetscCall(MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a));
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1383 {
1384   PetscFunctionBegin;
1385   if (nz)  *nz = 0;
1386   if (idx) PetscCall(PetscFree(*idx));
1387   if (v)   PetscCall(PetscFree(*v));
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1392 {
1393   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1394   Mat            C;
1395   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1396   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1397   MatScalar      *ata,*aa=a->a;
1398 
1399   PetscFunctionBegin;
1400   PetscCall(PetscCalloc1(1+nbs,&atfill));
1401   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1402     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1403 
1404     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
1405     PetscCall(MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N));
1406     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
1407     PetscCall(MatSeqBAIJSetPreallocation(C,bs,0,atfill));
1408 
1409     at  = (Mat_SeqBAIJ*)C->data;
1410     ati = at->i;
1411     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1412   } else {
1413     C = *B;
1414     at = (Mat_SeqBAIJ*)C->data;
1415     ati = at->i;
1416   }
1417 
1418   atj = at->j;
1419   ata = at->a;
1420 
1421   /* Copy ati into atfill so we have locations of the next free space in atj */
1422   PetscCall(PetscArraycpy(atfill,ati,nbs));
1423 
1424   /* Walk through A row-wise and mark nonzero entries of A^T. */
1425   for (i=0; i<mbs; i++) {
1426     anzj = ai[i+1] - ai[i];
1427     for (j=0; j<anzj; j++) {
1428       atj[atfill[*aj]] = i;
1429       for (kr=0; kr<bs; kr++) {
1430         for (k=0; k<bs; k++) {
1431           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1432         }
1433       }
1434       atfill[*aj++] += 1;
1435     }
1436   }
1437   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
1438   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
1439 
1440   /* Clean up temporary space and complete requests. */
1441   PetscCall(PetscFree(atfill));
1442 
1443   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1444     PetscCall(MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1445     *B = C;
1446   } else {
1447     PetscCall(MatHeaderMerge(A,&C));
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1453 {
1454   Mat            Btrans;
1455 
1456   PetscFunctionBegin;
1457   *f   = PETSC_FALSE;
1458   PetscCall(MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans));
1459   PetscCall(MatEqual_SeqBAIJ(B,Btrans,f));
1460   PetscCall(MatDestroy(&Btrans));
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1465 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1466 {
1467   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1468   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1469   PetscInt       *rowlens,*colidxs;
1470   PetscScalar    *matvals;
1471 
1472   PetscFunctionBegin;
1473   PetscCall(PetscViewerSetUp(viewer));
1474 
1475   M  = mat->rmap->N;
1476   N  = mat->cmap->N;
1477   m  = mat->rmap->n;
1478   bs = mat->rmap->bs;
1479   nz = bs*bs*A->nz;
1480 
1481   /* write matrix header */
1482   header[0] = MAT_FILE_CLASSID;
1483   header[1] = M; header[2] = N; header[3] = nz;
1484   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1485 
1486   /* store row lengths */
1487   PetscCall(PetscMalloc1(m,&rowlens));
1488   for (cnt=0, i=0; i<A->mbs; i++)
1489     for (j=0; j<bs; j++)
1490       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1491   PetscCall(PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT));
1492   PetscCall(PetscFree(rowlens));
1493 
1494   /* store column indices  */
1495   PetscCall(PetscMalloc1(nz,&colidxs));
1496   for (cnt=0, i=0; i<A->mbs; i++)
1497     for (k=0; k<bs; k++)
1498       for (j=A->i[i]; j<A->i[i+1]; j++)
1499         for (l=0; l<bs; l++)
1500           colidxs[cnt++] = bs*A->j[j] + l;
1501   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1502   PetscCall(PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT));
1503   PetscCall(PetscFree(colidxs));
1504 
1505   /* store nonzero values */
1506   PetscCall(PetscMalloc1(nz,&matvals));
1507   for (cnt=0, i=0; i<A->mbs; i++)
1508     for (k=0; k<bs; k++)
1509       for (j=A->i[i]; j<A->i[i+1]; j++)
1510         for (l=0; l<bs; l++)
1511           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1512   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1513   PetscCall(PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR));
1514   PetscCall(PetscFree(matvals));
1515 
1516   /* write block size option to the viewer's .info file */
1517   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1522 {
1523   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1524   PetscInt       i,bs = A->rmap->bs,k;
1525 
1526   PetscFunctionBegin;
1527   PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1528   for (i=0; i<a->mbs; i++) {
1529     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1));
1530     for (k=a->i[i]; k<a->i[i+1]; k++) {
1531       PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1));
1532     }
1533     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1534   }
1535   PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1540 {
1541   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1542   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1543   PetscViewerFormat format;
1544 
1545   PetscFunctionBegin;
1546   if (A->structure_only) {
1547     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A,viewer));
1548     PetscFunctionReturn(0);
1549   }
1550 
1551   PetscCall(PetscViewerGetFormat(viewer,&format));
1552   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1553     PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
1554   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1555     const char *matname;
1556     Mat        aij;
1557     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij));
1558     PetscCall(PetscObjectGetName((PetscObject)A,&matname));
1559     PetscCall(PetscObjectSetName((PetscObject)aij,matname));
1560     PetscCall(MatView(aij,viewer));
1561     PetscCall(MatDestroy(&aij));
1562   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1563       PetscFunctionReturn(0);
1564   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1565     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1566     for (i=0; i<a->mbs; i++) {
1567       for (j=0; j<bs; j++) {
1568         PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j));
1569         for (k=a->i[i]; k<a->i[i+1]; k++) {
1570           for (l=0; l<bs; l++) {
1571 #if defined(PETSC_USE_COMPLEX)
1572             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l,
1574                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1575             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1576               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l,
1577                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1578             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1579               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j])));
1580             }
1581 #else
1582             if (a->a[bs2*k + l*bs + j] != 0.0) {
1583               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]));
1584             }
1585 #endif
1586           }
1587         }
1588         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1589       }
1590     }
1591     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1592   } else {
1593     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1594     for (i=0; i<a->mbs; i++) {
1595       for (j=0; j<bs; j++) {
1596         PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j));
1597         for (k=a->i[i]; k<a->i[i+1]; k++) {
1598           for (l=0; l<bs; l++) {
1599 #if defined(PETSC_USE_COMPLEX)
1600             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1601               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l,
1602                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1603             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1604               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l,
1605                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1606             } else {
1607               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j])));
1608             }
1609 #else
1610             PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]));
1611 #endif
1612           }
1613         }
1614         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1615       }
1616     }
1617     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1618   }
1619   PetscCall(PetscViewerFlush(viewer));
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #include <petscdraw.h>
1624 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1625 {
1626   Mat               A = (Mat) Aa;
1627   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1628   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1629   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1630   MatScalar         *aa;
1631   PetscViewer       viewer;
1632   PetscViewerFormat format;
1633 
1634   PetscFunctionBegin;
1635   PetscCall(PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer));
1636   PetscCall(PetscViewerGetFormat(viewer,&format));
1637   PetscCall(PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr));
1638 
1639   /* loop over matrix elements drawing boxes */
1640 
1641   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1642     PetscDrawCollectiveBegin(draw);
1643     /* Blue for negative, Cyan for zero and  Red for positive */
1644     color = PETSC_DRAW_BLUE;
1645     for (i=0,row=0; i<mbs; i++,row+=bs) {
1646       for (j=a->i[i]; j<a->i[i+1]; j++) {
1647         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1648         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1649         aa  = a->a + j*bs2;
1650         for (k=0; k<bs; k++) {
1651           for (l=0; l<bs; l++) {
1652             if (PetscRealPart(*aa++) >=  0.) continue;
1653             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1654           }
1655         }
1656       }
1657     }
1658     color = PETSC_DRAW_CYAN;
1659     for (i=0,row=0; i<mbs; i++,row+=bs) {
1660       for (j=a->i[i]; j<a->i[i+1]; j++) {
1661         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1662         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1663         aa  = a->a + j*bs2;
1664         for (k=0; k<bs; k++) {
1665           for (l=0; l<bs; l++) {
1666             if (PetscRealPart(*aa++) != 0.) continue;
1667             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1668           }
1669         }
1670       }
1671     }
1672     color = PETSC_DRAW_RED;
1673     for (i=0,row=0; i<mbs; i++,row+=bs) {
1674       for (j=a->i[i]; j<a->i[i+1]; j++) {
1675         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1676         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1677         aa  = a->a + j*bs2;
1678         for (k=0; k<bs; k++) {
1679           for (l=0; l<bs; l++) {
1680             if (PetscRealPart(*aa++) <= 0.) continue;
1681             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1682           }
1683         }
1684       }
1685     }
1686     PetscDrawCollectiveEnd(draw);
1687   } else {
1688     /* use contour shading to indicate magnitude of values */
1689     /* first determine max of all nonzero values */
1690     PetscReal      minv = 0.0, maxv = 0.0;
1691     PetscDraw      popup;
1692 
1693     for (i=0; i<a->nz*a->bs2; i++) {
1694       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1695     }
1696     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1697     PetscCall(PetscDrawGetPopup(draw,&popup));
1698     PetscCall(PetscDrawScalePopup(popup,0.0,maxv));
1699 
1700     PetscDrawCollectiveBegin(draw);
1701     for (i=0,row=0; i<mbs; i++,row+=bs) {
1702       for (j=a->i[i]; j<a->i[i+1]; j++) {
1703         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1704         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1705         aa  = a->a + j*bs2;
1706         for (k=0; k<bs; k++) {
1707           for (l=0; l<bs; l++) {
1708             MatScalar v = *aa++;
1709             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1710             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1711           }
1712         }
1713       }
1714     }
1715     PetscDrawCollectiveEnd(draw);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1721 {
1722   PetscReal      xl,yl,xr,yr,w,h;
1723   PetscDraw      draw;
1724   PetscBool      isnull;
1725 
1726   PetscFunctionBegin;
1727   PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1728   PetscCall(PetscDrawIsNull(draw,&isnull));
1729   if (isnull) PetscFunctionReturn(0);
1730 
1731   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1732   xr  += w;          yr += h;        xl = -w;     yl = -h;
1733   PetscCall(PetscDrawSetCoordinates(draw,xl,yl,xr,yr));
1734   PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer));
1735   PetscCall(PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A));
1736   PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL));
1737   PetscCall(PetscDrawSave(draw));
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1742 {
1743   PetscBool      iascii,isbinary,isdraw;
1744 
1745   PetscFunctionBegin;
1746   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1747   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1748   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1749   if (iascii) {
1750     PetscCall(MatView_SeqBAIJ_ASCII(A,viewer));
1751   } else if (isbinary) {
1752     PetscCall(MatView_SeqBAIJ_Binary(A,viewer));
1753   } else if (isdraw) {
1754     PetscCall(MatView_SeqBAIJ_Draw(A,viewer));
1755   } else {
1756     Mat B;
1757     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1758     PetscCall(MatView(B,viewer));
1759     PetscCall(MatDestroy(&B));
1760   }
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1765 {
1766   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1767   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1768   PetscInt    *ai = a->i,*ailen = a->ilen;
1769   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1770   MatScalar   *ap,*aa = a->a;
1771 
1772   PetscFunctionBegin;
1773   for (k=0; k<m; k++) { /* loop over rows */
1774     row = im[k]; brow = row/bs;
1775     if (row < 0) {v += n; continue;} /* negative row */
1776     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row);
1777     rp   = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */
1778     ap   = aa ? aa + bs2*ai[brow] : NULL; /* mustn't add to NULL, that is UB */
1779     nrow = ailen[brow];
1780     for (l=0; l<n; l++) { /* loop over columns */
1781       if (in[l] < 0) {v++; continue;} /* negative column */
1782       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]);
1783       col  = in[l];
1784       bcol = col/bs;
1785       cidx = col%bs;
1786       ridx = row%bs;
1787       high = nrow;
1788       low  = 0; /* assume unsorted */
1789       while (high-low > 5) {
1790         t = (low+high)/2;
1791         if (rp[t] > bcol) high = t;
1792         else             low  = t;
1793       }
1794       for (i=low; i<high; i++) {
1795         if (rp[i] > bcol) break;
1796         if (rp[i] == bcol) {
1797           *v++ = ap[bs2*i+bs*cidx+ridx];
1798           goto finished;
1799         }
1800       }
1801       *v++ = 0.0;
1802 finished:;
1803     }
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1809 {
1810   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1811   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1812   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1813   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1814   PetscBool         roworiented=a->roworiented;
1815   const PetscScalar *value     = v;
1816   MatScalar         *ap=NULL,*aa = a->a,*bap;
1817 
1818   PetscFunctionBegin;
1819   if (roworiented) {
1820     stepval = (n-1)*bs;
1821   } else {
1822     stepval = (m-1)*bs;
1823   }
1824   for (k=0; k<m; k++) { /* loop over added rows */
1825     row = im[k];
1826     if (row < 0) continue;
1827     PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1);
1828     rp   = aj + ai[row];
1829     if (!A->structure_only) ap = aa + bs2*ai[row];
1830     rmax = imax[row];
1831     nrow = ailen[row];
1832     low  = 0;
1833     high = nrow;
1834     for (l=0; l<n; l++) { /* loop over added columns */
1835       if (in[l] < 0) continue;
1836       PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1);
1837       col = in[l];
1838       if (!A->structure_only) {
1839         if (roworiented) {
1840           value = v + (k*(stepval+bs) + l)*bs;
1841         } else {
1842           value = v + (l*(stepval+bs) + k)*bs;
1843         }
1844       }
1845       if (col <= lastcol) low = 0;
1846       else high = nrow;
1847       lastcol = col;
1848       while (high-low > 7) {
1849         t = (low+high)/2;
1850         if (rp[t] > col) high = t;
1851         else             low  = t;
1852       }
1853       for (i=low; i<high; i++) {
1854         if (rp[i] > col) break;
1855         if (rp[i] == col) {
1856           if (A->structure_only) goto noinsert2;
1857           bap = ap +  bs2*i;
1858           if (roworiented) {
1859             if (is == ADD_VALUES) {
1860               for (ii=0; ii<bs; ii++,value+=stepval) {
1861                 for (jj=ii; jj<bs2; jj+=bs) {
1862                   bap[jj] += *value++;
1863                 }
1864               }
1865             } else {
1866               for (ii=0; ii<bs; ii++,value+=stepval) {
1867                 for (jj=ii; jj<bs2; jj+=bs) {
1868                   bap[jj] = *value++;
1869                 }
1870               }
1871             }
1872           } else {
1873             if (is == ADD_VALUES) {
1874               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1875                 for (jj=0; jj<bs; jj++) {
1876                   bap[jj] += value[jj];
1877                 }
1878                 bap += bs;
1879               }
1880             } else {
1881               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1882                 for (jj=0; jj<bs; jj++) {
1883                   bap[jj]  = value[jj];
1884                 }
1885                 bap += bs;
1886               }
1887             }
1888           }
1889           goto noinsert2;
1890         }
1891       }
1892       if (nonew == 1) goto noinsert2;
1893       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
1894       if (A->structure_only) {
1895         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1896       } else {
1897         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1898       }
1899       N = nrow++ - 1; high++;
1900       /* shift up all the later entries in this row */
1901       PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
1902       rp[i] = col;
1903       if (!A->structure_only) {
1904         PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
1905         bap   = ap +  bs2*i;
1906         if (roworiented) {
1907           for (ii=0; ii<bs; ii++,value+=stepval) {
1908             for (jj=ii; jj<bs2; jj+=bs) {
1909               bap[jj] = *value++;
1910             }
1911           }
1912         } else {
1913           for (ii=0; ii<bs; ii++,value+=stepval) {
1914             for (jj=0; jj<bs; jj++) {
1915               *bap++ = *value++;
1916             }
1917           }
1918         }
1919       }
1920 noinsert2:;
1921       low = i;
1922     }
1923     ailen[row] = nrow;
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1929 {
1930   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1931   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1932   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1933   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1934   MatScalar      *aa  = a->a,*ap;
1935   PetscReal      ratio=0.6;
1936 
1937   PetscFunctionBegin;
1938   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1939 
1940   if (m) rmax = ailen[0];
1941   for (i=1; i<mbs; i++) {
1942     /* move each row back by the amount of empty slots (fshift) before it*/
1943     fshift += imax[i-1] - ailen[i-1];
1944     rmax    = PetscMax(rmax,ailen[i]);
1945     if (fshift) {
1946       ip = aj + ai[i];
1947       ap = aa + bs2*ai[i];
1948       N  = ailen[i];
1949       PetscCall(PetscArraymove(ip-fshift,ip,N));
1950       if (!A->structure_only) {
1951         PetscCall(PetscArraymove(ap-bs2*fshift,ap,bs2*N));
1952       }
1953     }
1954     ai[i] = ai[i-1] + ailen[i-1];
1955   }
1956   if (mbs) {
1957     fshift += imax[mbs-1] - ailen[mbs-1];
1958     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1959   }
1960 
1961   /* reset ilen and imax for each row */
1962   a->nonzerorowcnt = 0;
1963   if (A->structure_only) {
1964     PetscCall(PetscFree2(a->imax,a->ilen));
1965   } else { /* !A->structure_only */
1966     for (i=0; i<mbs; i++) {
1967       ailen[i] = imax[i] = ai[i+1] - ai[i];
1968       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1969     }
1970   }
1971   a->nz = ai[mbs];
1972 
1973   /* diagonals may have moved, so kill the diagonal pointers */
1974   a->idiagvalid = PETSC_FALSE;
1975   if (fshift && a->diag) {
1976     PetscCall(PetscFree(a->diag));
1977     PetscCall(PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt)));
1978     a->diag = NULL;
1979   }
1980   if (fshift) PetscCheck(a->nounused != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1981   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2));
1982   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs));
1983   PetscCall(PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax));
1984 
1985   A->info.mallocs    += a->reallocs;
1986   a->reallocs         = 0;
1987   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1988   a->rmax             = rmax;
1989 
1990   if (!A->structure_only) {
1991     PetscCall(MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio));
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 /*
1997    This function returns an array of flags which indicate the locations of contiguous
1998    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1999    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2000    Assume: sizes should be long enough to hold all the values.
2001 */
2002 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2003 {
2004   PetscInt  i,j,k,row;
2005   PetscBool flg;
2006 
2007   PetscFunctionBegin;
2008   for (i=0,j=0; i<n; j++) {
2009     row = idx[i];
2010     if (row%bs!=0) { /* Not the beginning of a block */
2011       sizes[j] = 1;
2012       i++;
2013     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2014       sizes[j] = 1;         /* Also makes sure at least 'bs' values exist for next else */
2015       i++;
2016     } else { /* Beginning of the block, so check if the complete block exists */
2017       flg = PETSC_TRUE;
2018       for (k=1; k<bs; k++) {
2019         if (row+k != idx[i+k]) { /* break in the block */
2020           flg = PETSC_FALSE;
2021           break;
2022         }
2023       }
2024       if (flg) { /* No break in the bs */
2025         sizes[j] = bs;
2026         i       += bs;
2027       } else {
2028         sizes[j] = 1;
2029         i++;
2030       }
2031     }
2032   }
2033   *bs_max = j;
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2038 {
2039   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2040   PetscInt          i,j,k,count,*rows;
2041   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2042   PetscScalar       zero = 0.0;
2043   MatScalar         *aa;
2044   const PetscScalar *xx;
2045   PetscScalar       *bb;
2046 
2047   PetscFunctionBegin;
2048   /* fix right hand side if needed */
2049   if (x && b) {
2050     PetscCall(VecGetArrayRead(x,&xx));
2051     PetscCall(VecGetArray(b,&bb));
2052     for (i=0; i<is_n; i++) {
2053       bb[is_idx[i]] = diag*xx[is_idx[i]];
2054     }
2055     PetscCall(VecRestoreArrayRead(x,&xx));
2056     PetscCall(VecRestoreArray(b,&bb));
2057   }
2058 
2059   /* Make a copy of the IS and  sort it */
2060   /* allocate memory for rows,sizes */
2061   PetscCall(PetscMalloc2(is_n,&rows,2*is_n,&sizes));
2062 
2063   /* copy IS values to rows, and sort them */
2064   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2065   PetscCall(PetscSortInt(is_n,rows));
2066 
2067   if (baij->keepnonzeropattern) {
2068     for (i=0; i<is_n; i++) sizes[i] = 1;
2069     bs_max          = is_n;
2070   } else {
2071     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max));
2072     A->nonzerostate++;
2073   }
2074 
2075   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2076     row = rows[j];
2077     PetscCheck(row >= 0 && row <= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row);
2078     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2079     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2080     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2081       if (diag != (PetscScalar)0.0) {
2082         if (baij->ilen[row/bs] > 0) {
2083           baij->ilen[row/bs]       = 1;
2084           baij->j[baij->i[row/bs]] = row/bs;
2085 
2086           PetscCall(PetscArrayzero(aa,count*bs));
2087         }
2088         /* Now insert all the diagonal values for this bs */
2089         for (k=0; k<bs; k++) {
2090           PetscCall((*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES));
2091         }
2092       } else { /* (diag == 0.0) */
2093         baij->ilen[row/bs] = 0;
2094       } /* end (diag == 0.0) */
2095     } else { /* (sizes[i] != bs) */
2096       PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2097       for (k=0; k<count; k++) {
2098         aa[0] =  zero;
2099         aa   += bs;
2100       }
2101       if (diag != (PetscScalar)0.0) {
2102         PetscCall((*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES));
2103       }
2104     }
2105   }
2106 
2107   PetscCall(PetscFree2(rows,sizes));
2108   PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY));
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2113 {
2114   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2115   PetscInt          i,j,k,count;
2116   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2117   PetscScalar       zero = 0.0;
2118   MatScalar         *aa;
2119   const PetscScalar *xx;
2120   PetscScalar       *bb;
2121   PetscBool         *zeroed,vecs = PETSC_FALSE;
2122 
2123   PetscFunctionBegin;
2124   /* fix right hand side if needed */
2125   if (x && b) {
2126     PetscCall(VecGetArrayRead(x,&xx));
2127     PetscCall(VecGetArray(b,&bb));
2128     vecs = PETSC_TRUE;
2129   }
2130 
2131   /* zero the columns */
2132   PetscCall(PetscCalloc1(A->rmap->n,&zeroed));
2133   for (i=0; i<is_n; i++) {
2134     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]);
2135     zeroed[is_idx[i]] = PETSC_TRUE;
2136   }
2137   for (i=0; i<A->rmap->N; i++) {
2138     if (!zeroed[i]) {
2139       row = i/bs;
2140       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2141         for (k=0; k<bs; k++) {
2142           col = bs*baij->j[j] + k;
2143           if (zeroed[col]) {
2144             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2145             if (vecs) bb[i] -= aa[0]*xx[col];
2146             aa[0] = 0.0;
2147           }
2148         }
2149       }
2150     } else if (vecs) bb[i] = diag*xx[i];
2151   }
2152   PetscCall(PetscFree(zeroed));
2153   if (vecs) {
2154     PetscCall(VecRestoreArrayRead(x,&xx));
2155     PetscCall(VecRestoreArray(b,&bb));
2156   }
2157 
2158   /* zero the rows */
2159   for (i=0; i<is_n; i++) {
2160     row   = is_idx[i];
2161     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2162     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2163     for (k=0; k<count; k++) {
2164       aa[0] =  zero;
2165       aa   += bs;
2166     }
2167     if (diag != (PetscScalar)0.0) {
2168       PetscCall((*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES));
2169     }
2170   }
2171   PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY));
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2176 {
2177   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2178   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2179   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2180   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2181   PetscInt       ridx,cidx,bs2=a->bs2;
2182   PetscBool      roworiented=a->roworiented;
2183   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2184 
2185   PetscFunctionBegin;
2186   for (k=0; k<m; k++) { /* loop over added rows */
2187     row  = im[k];
2188     brow = row/bs;
2189     if (row < 0) continue;
2190     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
2191     rp   = aj + ai[brow];
2192     if (!A->structure_only) ap = aa + bs2*ai[brow];
2193     rmax = imax[brow];
2194     nrow = ailen[brow];
2195     low  = 0;
2196     high = nrow;
2197     for (l=0; l<n; l++) { /* loop over added columns */
2198       if (in[l] < 0) continue;
2199       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
2200       col  = in[l]; bcol = col/bs;
2201       ridx = row % bs; cidx = col % bs;
2202       if (!A->structure_only) {
2203         if (roworiented) {
2204           value = v[l + k*n];
2205         } else {
2206           value = v[k + l*m];
2207         }
2208       }
2209       if (col <= lastcol) low = 0; else high = nrow;
2210       lastcol = col;
2211       while (high-low > 7) {
2212         t = (low+high)/2;
2213         if (rp[t] > bcol) high = t;
2214         else              low  = t;
2215       }
2216       for (i=low; i<high; i++) {
2217         if (rp[i] > bcol) break;
2218         if (rp[i] == bcol) {
2219           bap = ap +  bs2*i + bs*cidx + ridx;
2220           if (!A->structure_only) {
2221             if (is == ADD_VALUES) *bap += value;
2222             else                  *bap  = value;
2223           }
2224           goto noinsert1;
2225         }
2226       }
2227       if (nonew == 1) goto noinsert1;
2228       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2229       if (A->structure_only) {
2230         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2231       } else {
2232         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2233       }
2234       N = nrow++ - 1; high++;
2235       /* shift up all the later entries in this row */
2236       PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
2237       rp[i] = bcol;
2238       if (!A->structure_only) {
2239         PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
2240         PetscCall(PetscArrayzero(ap+bs2*i,bs2));
2241         ap[bs2*i + bs*cidx + ridx] = value;
2242       }
2243       a->nz++;
2244       A->nonzerostate++;
2245 noinsert1:;
2246       low = i;
2247     }
2248     ailen[brow] = nrow;
2249   }
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2254 {
2255   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2256   Mat            outA;
2257   PetscBool      row_identity,col_identity;
2258 
2259   PetscFunctionBegin;
2260   PetscCheck(info->levels == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2261   PetscCall(ISIdentity(row,&row_identity));
2262   PetscCall(ISIdentity(col,&col_identity));
2263   PetscCheck(row_identity && col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2264 
2265   outA            = inA;
2266   inA->factortype = MAT_FACTOR_LU;
2267   PetscCall(PetscFree(inA->solvertype));
2268   PetscCall(PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype));
2269 
2270   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2271 
2272   PetscCall(PetscObjectReference((PetscObject)row));
2273   PetscCall(ISDestroy(&a->row));
2274   a->row = row;
2275   PetscCall(PetscObjectReference((PetscObject)col));
2276   PetscCall(ISDestroy(&a->col));
2277   a->col = col;
2278 
2279   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2280   PetscCall(ISDestroy(&a->icol));
2281   PetscCall(ISInvertPermutation(col,PETSC_DECIDE,&a->icol));
2282   PetscCall(PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol));
2283 
2284   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity)));
2285   if (!a->solve_work) {
2286     PetscCall(PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work));
2287     PetscCall(PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar)));
2288   }
2289   PetscCall(MatLUFactorNumeric(outA,inA,info));
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2294 {
2295   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2296   PetscInt    i,nz,mbs;
2297 
2298   PetscFunctionBegin;
2299   nz  = baij->maxnz;
2300   mbs = baij->mbs;
2301   for (i=0; i<nz; i++) {
2302     baij->j[i] = indices[i];
2303   }
2304   baij->nz = nz;
2305   for (i=0; i<mbs; i++) {
2306     baij->ilen[i] = baij->imax[i];
2307   }
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 /*@
2312     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2313        in the matrix.
2314 
2315   Input Parameters:
2316 +  mat - the SeqBAIJ matrix
2317 -  indices - the column indices
2318 
2319   Level: advanced
2320 
2321   Notes:
2322     This can be called if you have precomputed the nonzero structure of the
2323   matrix and want to provide it to the matrix object to improve the performance
2324   of the MatSetValues() operation.
2325 
2326     You MUST have set the correct numbers of nonzeros per row in the call to
2327   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2328 
2329     MUST be called before any calls to MatSetValues();
2330 
2331 @*/
2332 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2333 {
2334   PetscFunctionBegin;
2335   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2336   PetscValidIntPointer(indices,2);
2337   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2342 {
2343   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2344   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2345   PetscReal      atmp;
2346   PetscScalar    *x,zero = 0.0;
2347   MatScalar      *aa;
2348   PetscInt       ncols,brow,krow,kcol;
2349 
2350   PetscFunctionBegin;
2351   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2352   PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2353   bs  = A->rmap->bs;
2354   aa  = a->a;
2355   ai  = a->i;
2356   aj  = a->j;
2357   mbs = a->mbs;
2358 
2359   PetscCall(VecSet(v,zero));
2360   PetscCall(VecGetArray(v,&x));
2361   PetscCall(VecGetLocalSize(v,&n));
2362   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2363   for (i=0; i<mbs; i++) {
2364     ncols = ai[1] - ai[0]; ai++;
2365     brow  = bs*i;
2366     for (j=0; j<ncols; j++) {
2367       for (kcol=0; kcol<bs; kcol++) {
2368         for (krow=0; krow<bs; krow++) {
2369           atmp = PetscAbsScalar(*aa);aa++;
2370           row  = brow + krow;   /* row index */
2371           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2372         }
2373       }
2374       aj++;
2375     }
2376   }
2377   PetscCall(VecRestoreArray(v,&x));
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2382 {
2383   PetscFunctionBegin;
2384   /* If the two matrices have the same copy implementation, use fast copy. */
2385   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2386     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2387     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2388     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2389 
2390     PetscCheck(a->i[ambs] == b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]);
2391     PetscCheck(abs == bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs);
2392     PetscCall(PetscArraycpy(b->a,a->a,bs2*a->i[ambs]));
2393     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2394   } else {
2395     PetscCall(MatCopy_Basic(A,B,str));
2396   }
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2401 {
2402   PetscFunctionBegin;
2403   PetscCall(MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL));
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2408 {
2409   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2410 
2411   PetscFunctionBegin;
2412   *array = a->a;
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2417 {
2418   PetscFunctionBegin;
2419   *array = NULL;
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2424 {
2425   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2426   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2427   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2428 
2429   PetscFunctionBegin;
2430   /* Set the number of nonzeros in the new matrix */
2431   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz));
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2436 {
2437   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2438   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2439   PetscBLASInt   one=1;
2440 
2441   PetscFunctionBegin;
2442   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2443     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2444     if (e) {
2445       PetscCall(PetscArraycmp(x->i,y->i,x->mbs+1,&e));
2446       if (e) {
2447         PetscCall(PetscArraycmp(x->j,y->j,x->i[x->mbs],&e));
2448         if (e) str = SAME_NONZERO_PATTERN;
2449       }
2450     }
2451     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN");
2452   }
2453   if (str == SAME_NONZERO_PATTERN) {
2454     PetscScalar  alpha = a;
2455     PetscBLASInt bnz;
2456     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
2457     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2458     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2459   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2460     PetscCall(MatAXPY_Basic(Y,a,X,str));
2461   } else {
2462     Mat      B;
2463     PetscInt *nnz;
2464     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2465     PetscCall(PetscMalloc1(Y->rmap->N,&nnz));
2466     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2467     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2468     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
2469     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
2470     PetscCall(MatSetType(B,(MatType) ((PetscObject)Y)->type_name));
2471     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz));
2472     PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz));
2473     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2474     PetscCall(MatHeaderMerge(Y,&B));
2475     PetscCall(PetscFree(nnz));
2476   }
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2481 {
2482 #if defined(PETSC_USE_COMPLEX)
2483   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2484   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2485   MatScalar   *aa = a->a;
2486 
2487   PetscFunctionBegin;
2488   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2489 #else
2490   PetscFunctionBegin;
2491 #endif
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2496 {
2497   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2498   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2499   MatScalar   *aa = a->a;
2500 
2501   PetscFunctionBegin;
2502   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2507 {
2508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2509   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2510   MatScalar   *aa = a->a;
2511 
2512   PetscFunctionBegin;
2513   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 /*
2518     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2519 */
2520 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2521 {
2522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2523   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2524   PetscInt       nz = a->i[m],row,*jj,mr,col;
2525 
2526   PetscFunctionBegin;
2527   *nn = n;
2528   if (!ia) PetscFunctionReturn(0);
2529   PetscCheck(!symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2530   PetscCall(PetscCalloc1(n,&collengths));
2531   PetscCall(PetscMalloc1(n+1,&cia));
2532   PetscCall(PetscMalloc1(nz,&cja));
2533   jj   = a->j;
2534   for (i=0; i<nz; i++) {
2535     collengths[jj[i]]++;
2536   }
2537   cia[0] = oshift;
2538   for (i=0; i<n; i++) {
2539     cia[i+1] = cia[i] + collengths[i];
2540   }
2541   PetscCall(PetscArrayzero(collengths,n));
2542   jj   = a->j;
2543   for (row=0; row<m; row++) {
2544     mr = a->i[row+1] - a->i[row];
2545     for (i=0; i<mr; i++) {
2546       col = *jj++;
2547 
2548       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2549     }
2550   }
2551   PetscCall(PetscFree(collengths));
2552   *ia  = cia; *ja = cja;
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2557 {
2558   PetscFunctionBegin;
2559   if (!ia) PetscFunctionReturn(0);
2560   PetscCall(PetscFree(*ia));
2561   PetscCall(PetscFree(*ja));
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 /*
2566  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2567  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2568  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2569  */
2570 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2571 {
2572   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2573   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2574   PetscInt       nz = a->i[m],row,*jj,mr,col;
2575   PetscInt       *cspidx;
2576 
2577   PetscFunctionBegin;
2578   *nn = n;
2579   if (!ia) PetscFunctionReturn(0);
2580 
2581   PetscCall(PetscCalloc1(n,&collengths));
2582   PetscCall(PetscMalloc1(n+1,&cia));
2583   PetscCall(PetscMalloc1(nz,&cja));
2584   PetscCall(PetscMalloc1(nz,&cspidx));
2585   jj   = a->j;
2586   for (i=0; i<nz; i++) {
2587     collengths[jj[i]]++;
2588   }
2589   cia[0] = oshift;
2590   for (i=0; i<n; i++) {
2591     cia[i+1] = cia[i] + collengths[i];
2592   }
2593   PetscCall(PetscArrayzero(collengths,n));
2594   jj   = a->j;
2595   for (row=0; row<m; row++) {
2596     mr = a->i[row+1] - a->i[row];
2597     for (i=0; i<mr; i++) {
2598       col = *jj++;
2599       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2600       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2601     }
2602   }
2603   PetscCall(PetscFree(collengths));
2604   *ia    = cia;
2605   *ja    = cja;
2606   *spidx = cspidx;
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2611 {
2612   PetscFunctionBegin;
2613   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done));
2614   PetscCall(PetscFree(*spidx));
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2619 {
2620   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2621 
2622   PetscFunctionBegin;
2623   if (!Y->preallocated || !aij->nz) {
2624     PetscCall(MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL));
2625   }
2626   PetscCall(MatShift_Basic(Y,a));
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 /* -------------------------------------------------------------------*/
2631 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2632                                        MatGetRow_SeqBAIJ,
2633                                        MatRestoreRow_SeqBAIJ,
2634                                        MatMult_SeqBAIJ_N,
2635                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2636                                        MatMultTranspose_SeqBAIJ,
2637                                        MatMultTransposeAdd_SeqBAIJ,
2638                                        NULL,
2639                                        NULL,
2640                                        NULL,
2641                                /* 10*/ NULL,
2642                                        MatLUFactor_SeqBAIJ,
2643                                        NULL,
2644                                        NULL,
2645                                        MatTranspose_SeqBAIJ,
2646                                /* 15*/ MatGetInfo_SeqBAIJ,
2647                                        MatEqual_SeqBAIJ,
2648                                        MatGetDiagonal_SeqBAIJ,
2649                                        MatDiagonalScale_SeqBAIJ,
2650                                        MatNorm_SeqBAIJ,
2651                                /* 20*/ NULL,
2652                                        MatAssemblyEnd_SeqBAIJ,
2653                                        MatSetOption_SeqBAIJ,
2654                                        MatZeroEntries_SeqBAIJ,
2655                                /* 24*/ MatZeroRows_SeqBAIJ,
2656                                        NULL,
2657                                        NULL,
2658                                        NULL,
2659                                        NULL,
2660                                /* 29*/ MatSetUp_SeqBAIJ,
2661                                        NULL,
2662                                        NULL,
2663                                        NULL,
2664                                        NULL,
2665                                /* 34*/ MatDuplicate_SeqBAIJ,
2666                                        NULL,
2667                                        NULL,
2668                                        MatILUFactor_SeqBAIJ,
2669                                        NULL,
2670                                /* 39*/ MatAXPY_SeqBAIJ,
2671                                        MatCreateSubMatrices_SeqBAIJ,
2672                                        MatIncreaseOverlap_SeqBAIJ,
2673                                        MatGetValues_SeqBAIJ,
2674                                        MatCopy_SeqBAIJ,
2675                                /* 44*/ NULL,
2676                                        MatScale_SeqBAIJ,
2677                                        MatShift_SeqBAIJ,
2678                                        NULL,
2679                                        MatZeroRowsColumns_SeqBAIJ,
2680                                /* 49*/ NULL,
2681                                        MatGetRowIJ_SeqBAIJ,
2682                                        MatRestoreRowIJ_SeqBAIJ,
2683                                        MatGetColumnIJ_SeqBAIJ,
2684                                        MatRestoreColumnIJ_SeqBAIJ,
2685                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2686                                        NULL,
2687                                        NULL,
2688                                        NULL,
2689                                        MatSetValuesBlocked_SeqBAIJ,
2690                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2691                                        MatDestroy_SeqBAIJ,
2692                                        MatView_SeqBAIJ,
2693                                        NULL,
2694                                        NULL,
2695                                /* 64*/ NULL,
2696                                        NULL,
2697                                        NULL,
2698                                        NULL,
2699                                        NULL,
2700                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2701                                        NULL,
2702                                        MatConvert_Basic,
2703                                        NULL,
2704                                        NULL,
2705                                /* 74*/ NULL,
2706                                        MatFDColoringApply_BAIJ,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                /* 79*/ NULL,
2711                                        NULL,
2712                                        NULL,
2713                                        NULL,
2714                                        MatLoad_SeqBAIJ,
2715                                /* 84*/ NULL,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                /* 89*/ NULL,
2721                                        NULL,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                /* 94*/ NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        NULL,
2729                                        NULL,
2730                                /* 99*/ NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        MatConjugate_SeqBAIJ,
2734                                        NULL,
2735                                /*104*/ NULL,
2736                                        MatRealPart_SeqBAIJ,
2737                                        MatImaginaryPart_SeqBAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                /*109*/ NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        MatMissingDiagonal_SeqBAIJ,
2745                                /*114*/ NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        NULL,
2750                                /*119*/ NULL,
2751                                        NULL,
2752                                        MatMultHermitianTranspose_SeqBAIJ,
2753                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2754                                        NULL,
2755                                /*124*/ NULL,
2756                                        MatGetColumnReductions_SeqBAIJ,
2757                                        MatInvertBlockDiagonal_SeqBAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                /*129*/ NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                /*134*/ NULL,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                /*139*/ MatSetBlockSizes_Default,
2771                                        NULL,
2772                                        NULL,
2773                                        MatFDColoringSetUp_SeqXAIJ,
2774                                        NULL,
2775                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2776                                        MatDestroySubMatrices_SeqBAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        NULL,
2780                                        NULL
2781 };
2782 
2783 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2784 {
2785   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2786   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2787 
2788   PetscFunctionBegin;
2789   PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2790 
2791   /* allocate space for values if not already there */
2792   if (!aij->saved_values) {
2793     PetscCall(PetscMalloc1(nz+1,&aij->saved_values));
2794     PetscCall(PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar)));
2795   }
2796 
2797   /* copy values over */
2798   PetscCall(PetscArraycpy(aij->saved_values,aij->a,nz));
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2803 {
2804   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2805   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2806 
2807   PetscFunctionBegin;
2808   PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2809   PetscCheck(aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2810 
2811   /* copy values over */
2812   PetscCall(PetscArraycpy(aij->a,aij->saved_values,nz));
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2817 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2818 
2819 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2820 {
2821   Mat_SeqBAIJ    *b;
2822   PetscInt       i,mbs,nbs,bs2;
2823   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2824 
2825   PetscFunctionBegin;
2826   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2827   if (nz == MAT_SKIP_ALLOCATION) {
2828     skipallocation = PETSC_TRUE;
2829     nz             = 0;
2830   }
2831 
2832   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
2833   PetscCall(PetscLayoutSetUp(B->rmap));
2834   PetscCall(PetscLayoutSetUp(B->cmap));
2835   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2836 
2837   B->preallocated = PETSC_TRUE;
2838 
2839   mbs = B->rmap->n/bs;
2840   nbs = B->cmap->n/bs;
2841   bs2 = bs*bs;
2842 
2843   PetscCheck(mbs*bs==B->rmap->n && nbs*bs==B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs);
2844 
2845   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2846   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
2847   if (nnz) {
2848     for (i=0; i<mbs; i++) {
2849       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
2850       PetscCheck(nnz[i] <= nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs);
2851     }
2852   }
2853 
2854   b    = (Mat_SeqBAIJ*)B->data;
2855   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2856   PetscCall(PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL));
2857   PetscOptionsEnd();
2858 
2859   if (!flg) {
2860     switch (bs) {
2861     case 1:
2862       B->ops->mult    = MatMult_SeqBAIJ_1;
2863       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2864       break;
2865     case 2:
2866       B->ops->mult    = MatMult_SeqBAIJ_2;
2867       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2868       break;
2869     case 3:
2870       B->ops->mult    = MatMult_SeqBAIJ_3;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2872       break;
2873     case 4:
2874       B->ops->mult    = MatMult_SeqBAIJ_4;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2876       break;
2877     case 5:
2878       B->ops->mult    = MatMult_SeqBAIJ_5;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2880       break;
2881     case 6:
2882       B->ops->mult    = MatMult_SeqBAIJ_6;
2883       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2884       break;
2885     case 7:
2886       B->ops->mult    = MatMult_SeqBAIJ_7;
2887       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2888       break;
2889     case 9:
2890     {
2891       PetscInt version = 1;
2892       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2893       switch (version) {
2894 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2895       case 1:
2896         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2897         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2898         PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2899         break;
2900 #endif
2901       default:
2902         B->ops->mult    = MatMult_SeqBAIJ_N;
2903         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2904         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2905         break;
2906       }
2907       break;
2908     }
2909     case 11:
2910       B->ops->mult    = MatMult_SeqBAIJ_11;
2911       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2912       break;
2913     case 12:
2914     {
2915       PetscInt version = 1;
2916       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2917       switch (version) {
2918       case 1:
2919         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2920         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2921         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2922         break;
2923       case 2:
2924         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2925         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2926         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2927         break;
2928 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2929       case 3:
2930         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2931         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2932         PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2933         break;
2934 #endif
2935       default:
2936         B->ops->mult    = MatMult_SeqBAIJ_N;
2937         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2938         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2939         break;
2940       }
2941       break;
2942     }
2943     case 15:
2944     {
2945       PetscInt version = 1;
2946       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2947       switch (version) {
2948       case 1:
2949         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2950         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2951         break;
2952       case 2:
2953         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2954         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2955         break;
2956       case 3:
2957         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2958         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2959         break;
2960       case 4:
2961         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2962         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2963         break;
2964       default:
2965         B->ops->mult    = MatMult_SeqBAIJ_N;
2966         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2967         break;
2968       }
2969       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2970       break;
2971     }
2972     default:
2973       B->ops->mult    = MatMult_SeqBAIJ_N;
2974       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2975       PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2976       break;
2977     }
2978   }
2979   B->ops->sor = MatSOR_SeqBAIJ;
2980   b->mbs = mbs;
2981   b->nbs = nbs;
2982   if (!skipallocation) {
2983     if (!b->imax) {
2984       PetscCall(PetscMalloc2(mbs,&b->imax,mbs,&b->ilen));
2985       PetscCall(PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt)));
2986 
2987       b->free_imax_ilen = PETSC_TRUE;
2988     }
2989     /* b->ilen will count nonzeros in each block row so far. */
2990     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2991     if (!nnz) {
2992       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2993       else if (nz < 0) nz = 1;
2994       nz = PetscMin(nz,nbs);
2995       for (i=0; i<mbs; i++) b->imax[i] = nz;
2996       PetscCall(PetscIntMultError(nz,mbs,&nz));
2997     } else {
2998       PetscInt64 nz64 = 0;
2999       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3000       PetscCall(PetscIntCast(nz64,&nz));
3001     }
3002 
3003     /* allocate the matrix space */
3004     PetscCall(MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i));
3005     if (B->structure_only) {
3006       PetscCall(PetscMalloc1(nz,&b->j));
3007       PetscCall(PetscMalloc1(B->rmap->N+1,&b->i));
3008       PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt)));
3009     } else {
3010       PetscInt nzbs2 = 0;
3011       PetscCall(PetscIntMultError(nz,bs2,&nzbs2));
3012       PetscCall(PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i));
3013       PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))));
3014       PetscCall(PetscArrayzero(b->a,nz*bs2));
3015     }
3016     PetscCall(PetscArrayzero(b->j,nz));
3017 
3018     if (B->structure_only) {
3019       b->singlemalloc = PETSC_FALSE;
3020       b->free_a       = PETSC_FALSE;
3021     } else {
3022       b->singlemalloc = PETSC_TRUE;
3023       b->free_a       = PETSC_TRUE;
3024     }
3025     b->free_ij = PETSC_TRUE;
3026 
3027     b->i[0] = 0;
3028     for (i=1; i<mbs+1; i++) {
3029       b->i[i] = b->i[i-1] + b->imax[i-1];
3030     }
3031 
3032   } else {
3033     b->free_a  = PETSC_FALSE;
3034     b->free_ij = PETSC_FALSE;
3035   }
3036 
3037   b->bs2              = bs2;
3038   b->mbs              = mbs;
3039   b->nz               = 0;
3040   b->maxnz            = nz;
3041   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3042   B->was_assembled    = PETSC_FALSE;
3043   B->assembled        = PETSC_FALSE;
3044   if (realalloc) PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3049 {
3050   PetscInt       i,m,nz,nz_max=0,*nnz;
3051   PetscScalar    *values=NULL;
3052   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3053 
3054   PetscFunctionBegin;
3055   PetscCheck(bs >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
3056   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
3057   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
3058   PetscCall(PetscLayoutSetUp(B->rmap));
3059   PetscCall(PetscLayoutSetUp(B->cmap));
3060   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
3061   m    = B->rmap->n/bs;
3062 
3063   PetscCheck(ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
3064   PetscCall(PetscMalloc1(m+1, &nnz));
3065   for (i=0; i<m; i++) {
3066     nz = ii[i+1]- ii[i];
3067     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
3068     nz_max = PetscMax(nz_max, nz);
3069     nnz[i] = nz;
3070   }
3071   PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz));
3072   PetscCall(PetscFree(nnz));
3073 
3074   values = (PetscScalar*)V;
3075   if (!values) {
3076     PetscCall(PetscCalloc1(bs*bs*(nz_max+1),&values));
3077   }
3078   for (i=0; i<m; i++) {
3079     PetscInt          ncols  = ii[i+1] - ii[i];
3080     const PetscInt    *icols = jj + ii[i];
3081     if (bs == 1 || !roworiented) {
3082       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3083       PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES));
3084     } else {
3085       PetscInt j;
3086       for (j=0; j<ncols; j++) {
3087         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3088         PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES));
3089       }
3090     }
3091   }
3092   if (!V) PetscCall(PetscFree(values));
3093   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3094   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3095   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 /*@C
3100    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3101 
3102    Not Collective
3103 
3104    Input Parameter:
3105 .  mat - a MATSEQBAIJ matrix
3106 
3107    Output Parameter:
3108 .   array - pointer to the data
3109 
3110    Level: intermediate
3111 
3112 .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3113 @*/
3114 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3115 {
3116   PetscFunctionBegin;
3117   PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3118   PetscFunctionReturn(0);
3119 }
3120 
3121 /*@C
3122    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3123 
3124    Not Collective
3125 
3126    Input Parameters:
3127 +  mat - a MATSEQBAIJ matrix
3128 -  array - pointer to the data
3129 
3130    Level: intermediate
3131 
3132 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3133 @*/
3134 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3135 {
3136   PetscFunctionBegin;
3137   PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3138   PetscFunctionReturn(0);
3139 }
3140 
3141 /*MC
3142    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3143    block sparse compressed row format.
3144 
3145    Options Database Keys:
3146 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3147 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3148 
3149    Level: beginner
3150 
3151    Notes:
3152     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3153     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3154 
3155    Run with -info to see what version of the matrix-vector product is being used
3156 
3157 .seealso: `MatCreateSeqBAIJ()`
3158 M*/
3159 
3160 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3161 
3162 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3163 {
3164   PetscMPIInt    size;
3165   Mat_SeqBAIJ    *b;
3166 
3167   PetscFunctionBegin;
3168   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
3169   PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3170 
3171   PetscCall(PetscNewLog(B,&b));
3172   B->data = (void*)b;
3173   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
3174 
3175   b->row          = NULL;
3176   b->col          = NULL;
3177   b->icol         = NULL;
3178   b->reallocs     = 0;
3179   b->saved_values = NULL;
3180 
3181   b->roworiented        = PETSC_TRUE;
3182   b->nonew              = 0;
3183   b->diag               = NULL;
3184   B->spptr              = NULL;
3185   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3186   b->keepnonzeropattern = PETSC_FALSE;
3187 
3188   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ));
3189   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ));
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ));
3198 #if defined(PETSC_HAVE_HYPRE)
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE));
3200 #endif
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS));
3202   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ));
3203   PetscFunctionReturn(0);
3204 }
3205 
3206 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3207 {
3208   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3209   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3210 
3211   PetscFunctionBegin;
3212   PetscCheck(a->i[mbs] == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3213 
3214   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3215     c->imax           = a->imax;
3216     c->ilen           = a->ilen;
3217     c->free_imax_ilen = PETSC_FALSE;
3218   } else {
3219     PetscCall(PetscMalloc2(mbs,&c->imax,mbs,&c->ilen));
3220     PetscCall(PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt)));
3221     for (i=0; i<mbs; i++) {
3222       c->imax[i] = a->imax[i];
3223       c->ilen[i] = a->ilen[i];
3224     }
3225     c->free_imax_ilen = PETSC_TRUE;
3226   }
3227 
3228   /* allocate the matrix space */
3229   if (mallocmatspace) {
3230     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3231       PetscCall(PetscCalloc1(bs2*nz,&c->a));
3232       PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar)));
3233 
3234       c->i            = a->i;
3235       c->j            = a->j;
3236       c->singlemalloc = PETSC_FALSE;
3237       c->free_a       = PETSC_TRUE;
3238       c->free_ij      = PETSC_FALSE;
3239       c->parent       = A;
3240       C->preallocated = PETSC_TRUE;
3241       C->assembled    = PETSC_TRUE;
3242 
3243       PetscCall(PetscObjectReference((PetscObject)A));
3244       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3245       PetscCall(MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3246     } else {
3247       PetscCall(PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i));
3248       PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt)));
3249 
3250       c->singlemalloc = PETSC_TRUE;
3251       c->free_a       = PETSC_TRUE;
3252       c->free_ij      = PETSC_TRUE;
3253 
3254       PetscCall(PetscArraycpy(c->i,a->i,mbs+1));
3255       if (mbs > 0) {
3256         PetscCall(PetscArraycpy(c->j,a->j,nz));
3257         if (cpvalues == MAT_COPY_VALUES) {
3258           PetscCall(PetscArraycpy(c->a,a->a,bs2*nz));
3259         } else {
3260           PetscCall(PetscArrayzero(c->a,bs2*nz));
3261         }
3262       }
3263       C->preallocated = PETSC_TRUE;
3264       C->assembled    = PETSC_TRUE;
3265     }
3266   }
3267 
3268   c->roworiented = a->roworiented;
3269   c->nonew       = a->nonew;
3270 
3271   PetscCall(PetscLayoutReference(A->rmap,&C->rmap));
3272   PetscCall(PetscLayoutReference(A->cmap,&C->cmap));
3273 
3274   c->bs2         = a->bs2;
3275   c->mbs         = a->mbs;
3276   c->nbs         = a->nbs;
3277 
3278   if (a->diag) {
3279     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3280       c->diag      = a->diag;
3281       c->free_diag = PETSC_FALSE;
3282     } else {
3283       PetscCall(PetscMalloc1(mbs+1,&c->diag));
3284       PetscCall(PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt)));
3285       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3286       c->free_diag = PETSC_TRUE;
3287     }
3288   } else c->diag = NULL;
3289 
3290   c->nz         = a->nz;
3291   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3292   c->solve_work = NULL;
3293   c->mult_work  = NULL;
3294   c->sor_workt  = NULL;
3295   c->sor_work   = NULL;
3296 
3297   c->compressedrow.use   = a->compressedrow.use;
3298   c->compressedrow.nrows = a->compressedrow.nrows;
3299   if (a->compressedrow.use) {
3300     i    = a->compressedrow.nrows;
3301     PetscCall(PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex));
3302     PetscCall(PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt)));
3303     PetscCall(PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1));
3304     PetscCall(PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i));
3305   } else {
3306     c->compressedrow.use    = PETSC_FALSE;
3307     c->compressedrow.i      = NULL;
3308     c->compressedrow.rindex = NULL;
3309   }
3310   C->nonzerostate = A->nonzerostate;
3311 
3312   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist));
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3317 {
3318   PetscFunctionBegin;
3319   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B));
3320   PetscCall(MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n));
3321   PetscCall(MatSetType(*B,MATSEQBAIJ));
3322   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE));
3323   PetscFunctionReturn(0);
3324 }
3325 
3326 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3327 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3328 {
3329   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3330   PetscInt       *rowidxs,*colidxs;
3331   PetscScalar    *matvals;
3332 
3333   PetscFunctionBegin;
3334   PetscCall(PetscViewerSetUp(viewer));
3335 
3336   /* read matrix header */
3337   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3338   PetscCheck(header[0] == MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3339   M = header[1]; N = header[2]; nz = header[3];
3340   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3341   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3342   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3343 
3344   /* set block sizes from the viewer's .info file */
3345   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3346   /* set local and global sizes if not set already */
3347   if (mat->rmap->n < 0) mat->rmap->n = M;
3348   if (mat->cmap->n < 0) mat->cmap->n = N;
3349   if (mat->rmap->N < 0) mat->rmap->N = M;
3350   if (mat->cmap->N < 0) mat->cmap->N = N;
3351   PetscCall(PetscLayoutSetUp(mat->rmap));
3352   PetscCall(PetscLayoutSetUp(mat->cmap));
3353 
3354   /* check if the matrix sizes are correct */
3355   PetscCall(MatGetSize(mat,&rows,&cols));
3356   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3357   PetscCall(MatGetBlockSize(mat,&bs));
3358   PetscCall(MatGetLocalSize(mat,&m,&n));
3359   mbs = m/bs; nbs = n/bs;
3360 
3361   /* read in row lengths, column indices and nonzero values */
3362   PetscCall(PetscMalloc1(m+1,&rowidxs));
3363   PetscCall(PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT));
3364   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3365   sum = rowidxs[m];
3366   PetscCheck(sum == nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3367 
3368   /* read in column indices and nonzero values */
3369   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals));
3370   PetscCall(PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT));
3371   PetscCall(PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR));
3372 
3373   { /* preallocate matrix storage */
3374     PetscBT   bt; /* helper bit set to count nonzeros */
3375     PetscInt  *nnz;
3376     PetscBool sbaij;
3377 
3378     PetscCall(PetscBTCreate(nbs,&bt));
3379     PetscCall(PetscCalloc1(mbs,&nnz));
3380     PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij));
3381     for (i=0; i<mbs; i++) {
3382       PetscCall(PetscBTMemzero(nbs,bt));
3383       for (k=0; k<bs; k++) {
3384         PetscInt row = bs*i + k;
3385         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3386           PetscInt col = colidxs[j];
3387           if (!sbaij || col >= row)
3388             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3389         }
3390       }
3391     }
3392     PetscCall(PetscBTDestroy(&bt));
3393     PetscCall(MatSeqBAIJSetPreallocation(mat,bs,0,nnz));
3394     PetscCall(MatSeqSBAIJSetPreallocation(mat,bs,0,nnz));
3395     PetscCall(PetscFree(nnz));
3396   }
3397 
3398   /* store matrix values */
3399   for (i=0; i<m; i++) {
3400     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3401     PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES));
3402   }
3403 
3404   PetscCall(PetscFree(rowidxs));
3405   PetscCall(PetscFree2(colidxs,matvals));
3406   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
3407   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
3408   PetscFunctionReturn(0);
3409 }
3410 
3411 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3412 {
3413   PetscBool isbinary;
3414 
3415   PetscFunctionBegin;
3416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3417   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3418   PetscCall(MatLoad_SeqBAIJ_Binary(mat,viewer));
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 /*@C
3423    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3424    compressed row) format.  For good matrix assembly performance the
3425    user should preallocate the matrix storage by setting the parameter nz
3426    (or the array nnz).  By setting these parameters accurately, performance
3427    during matrix assembly can be increased by more than a factor of 50.
3428 
3429    Collective
3430 
3431    Input Parameters:
3432 +  comm - MPI communicator, set to PETSC_COMM_SELF
3433 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3434           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3435 .  m - number of rows
3436 .  n - number of columns
3437 .  nz - number of nonzero blocks  per block row (same for all rows)
3438 -  nnz - array containing the number of nonzero blocks in the various block rows
3439          (possibly different for each block row) or NULL
3440 
3441    Output Parameter:
3442 .  A - the matrix
3443 
3444    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3445    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3446    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3447 
3448    Options Database Keys:
3449 +   -mat_no_unroll - uses code that does not unroll the loops in the
3450                      block calculations (much slower)
3451 -    -mat_block_size - size of the blocks to use
3452 
3453    Level: intermediate
3454 
3455    Notes:
3456    The number of rows and columns must be divisible by blocksize.
3457 
3458    If the nnz parameter is given then the nz parameter is ignored
3459 
3460    A nonzero block is any block that as 1 or more nonzeros in it
3461 
3462    The block AIJ format is fully compatible with standard Fortran 77
3463    storage.  That is, the stored row and column indices can begin at
3464    either one (as in Fortran) or zero.  See the users' manual for details.
3465 
3466    Specify the preallocated storage with either nz or nnz (not both).
3467    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3468    allocation.  See Users-Manual: ch_mat for details.
3469    matrices.
3470 
3471 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3472 @*/
3473 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3474 {
3475   PetscFunctionBegin;
3476   PetscCall(MatCreate(comm,A));
3477   PetscCall(MatSetSizes(*A,m,n,m,n));
3478   PetscCall(MatSetType(*A,MATSEQBAIJ));
3479   PetscCall(MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz));
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 /*@C
3484    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3485    per row in the matrix. For good matrix assembly performance the
3486    user should preallocate the matrix storage by setting the parameter nz
3487    (or the array nnz).  By setting these parameters accurately, performance
3488    during matrix assembly can be increased by more than a factor of 50.
3489 
3490    Collective
3491 
3492    Input Parameters:
3493 +  B - the matrix
3494 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3495           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3496 .  nz - number of block nonzeros per block row (same for all rows)
3497 -  nnz - array containing the number of block nonzeros in the various block rows
3498          (possibly different for each block row) or NULL
3499 
3500    Options Database Keys:
3501 +   -mat_no_unroll - uses code that does not unroll the loops in the
3502                      block calculations (much slower)
3503 -   -mat_block_size - size of the blocks to use
3504 
3505    Level: intermediate
3506 
3507    Notes:
3508    If the nnz parameter is given then the nz parameter is ignored
3509 
3510    You can call MatGetInfo() to get information on how effective the preallocation was;
3511    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3512    You can also run with the option -info and look for messages with the string
3513    malloc in them to see if additional memory allocation was needed.
3514 
3515    The block AIJ format is fully compatible with standard Fortran 77
3516    storage.  That is, the stored row and column indices can begin at
3517    either one (as in Fortran) or zero.  See the users' manual for details.
3518 
3519    Specify the preallocated storage with either nz or nnz (not both).
3520    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3521    allocation.  See Users-Manual: ch_mat for details.
3522 
3523 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3524 @*/
3525 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3526 {
3527   PetscFunctionBegin;
3528   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3529   PetscValidType(B,1);
3530   PetscValidLogicalCollectiveInt(B,bs,2);
3531   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 /*@C
3536    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3537 
3538    Collective
3539 
3540    Input Parameters:
3541 +  B - the matrix
3542 .  i - the indices into j for the start of each local row (starts with zero)
3543 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3544 -  v - optional values in the matrix
3545 
3546    Level: advanced
3547 
3548    Notes:
3549    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3550    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3551    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3552    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3553    block column and the second index is over columns within a block.
3554 
3555    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3556 
3557 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3558 @*/
3559 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3560 {
3561   PetscFunctionBegin;
3562   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3563   PetscValidType(B,1);
3564   PetscValidLogicalCollectiveInt(B,bs,2);
3565   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 /*@
3570      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3571 
3572      Collective
3573 
3574    Input Parameters:
3575 +  comm - must be an MPI communicator of size 1
3576 .  bs - size of block
3577 .  m - number of rows
3578 .  n - number of columns
3579 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3580 .  j - column indices
3581 -  a - matrix values
3582 
3583    Output Parameter:
3584 .  mat - the matrix
3585 
3586    Level: advanced
3587 
3588    Notes:
3589        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3590     once the matrix is destroyed
3591 
3592        You cannot set new nonzero locations into this matrix, that will generate an error.
3593 
3594        The i and j indices are 0 based
3595 
3596        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3597 
3598       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3599       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3600       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3601       with column-major ordering within blocks.
3602 
3603 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3604 
3605 @*/
3606 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3607 {
3608   PetscInt       ii;
3609   Mat_SeqBAIJ    *baij;
3610 
3611   PetscFunctionBegin;
3612   PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs);
3613   if (m > 0) PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3614 
3615   PetscCall(MatCreate(comm,mat));
3616   PetscCall(MatSetSizes(*mat,m,n,m,n));
3617   PetscCall(MatSetType(*mat,MATSEQBAIJ));
3618   PetscCall(MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL));
3619   baij = (Mat_SeqBAIJ*)(*mat)->data;
3620   PetscCall(PetscMalloc2(m,&baij->imax,m,&baij->ilen));
3621   PetscCall(PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt)));
3622 
3623   baij->i = i;
3624   baij->j = j;
3625   baij->a = a;
3626 
3627   baij->singlemalloc = PETSC_FALSE;
3628   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3629   baij->free_a       = PETSC_FALSE;
3630   baij->free_ij      = PETSC_FALSE;
3631 
3632   for (ii=0; ii<m; ii++) {
3633     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3634     PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]);
3635   }
3636   if (PetscDefined(USE_DEBUG)) {
3637     for (ii=0; ii<baij->i[m]; ii++) {
3638       PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3639       PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3640     }
3641   }
3642 
3643   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3644   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3645   PetscFunctionReturn(0);
3646 }
3647 
3648 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3649 {
3650   PetscMPIInt    size;
3651 
3652   PetscFunctionBegin;
3653   PetscCallMPI(MPI_Comm_size(comm,&size));
3654   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3655     PetscCall(MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN));
3656   } else {
3657     PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat));
3658   }
3659   PetscFunctionReturn(0);
3660 }
3661