xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 421480d92be24cdb9933c60510b8e175c0a8d034)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blockinvert.h>
8 #include <petsc/private/kernels/blockmatmult.h>
9 
10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11 #define TYPE BAIJ
12 #define TYPE_BS
13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14 #undef TYPE_BS
15 #define TYPE_BS _BS
16 #define TYPE_BS_ON
17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18 #undef TYPE_BS
19 #include "../src/mat/impls/aij/seq/seqhashmat.h"
20 #undef TYPE
21 #undef TYPE_BS_ON
22 
23 #if defined(PETSC_HAVE_HYPRE)
24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25 #endif
26 
27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29 #endif
30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31 
32 MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs)
33 
34 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
35 {
36   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
37   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
38   MatScalar   *a_val = a_aij->a;
39 
40   PetscFunctionBegin;
41   PetscCall(MatGetSize(A, &m, &n));
42   PetscCall(PetscArrayzero(reductions, n));
43   if (type == NORM_2) {
44     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
45       for (jb = 0; jb < bs; jb++) {
46         for (ib = 0; ib < bs; ib++) {
47           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
48           a_val++;
49         }
50       }
51     }
52   } else if (type == NORM_1) {
53     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
54       for (jb = 0; jb < bs; jb++) {
55         for (ib = 0; ib < bs; ib++) {
56           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
57           a_val++;
58         }
59       }
60     }
61   } else if (type == NORM_INFINITY) {
62     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
63       for (jb = 0; jb < bs; jb++) {
64         for (ib = 0; ib < bs; ib++) {
65           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
66           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
67           a_val++;
68         }
69       }
70     }
71   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
72     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
73       for (jb = 0; jb < bs; jb++) {
74         for (ib = 0; ib < bs; ib++) {
75           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
76           a_val++;
77         }
78       }
79     }
80   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
82       for (jb = 0; jb < bs; jb++) {
83         for (ib = 0; ib < bs; ib++) {
84           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
85           a_val++;
86         }
87       }
88     }
89   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
90   if (type == NORM_2) {
91     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
92   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
93     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
94   }
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
99 {
100   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data;
101   PetscInt        i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
102   MatScalar      *v     = a->a, *odiag, *diag, work[25], *v_work;
103   PetscReal       shift = 0.0;
104   PetscBool       allowzeropivot, zeropivotdetected = PETSC_FALSE;
105   const PetscInt *adiag;
106 
107   PetscFunctionBegin;
108   allowzeropivot = PetscNot(A->erroriffailure);
109 
110   if (a->idiagvalid) {
111     if (values) *values = a->idiag;
112     PetscFunctionReturn(PETSC_SUCCESS);
113   }
114   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL));
115   if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
116   diag = a->idiag;
117   if (values) *values = a->idiag;
118   /* factor and invert each block */
119   switch (bs) {
120   case 1:
121     for (i = 0; i < mbs; i++) {
122       odiag   = v + 1 * adiag[i];
123       diag[0] = odiag[0];
124 
125       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
126         PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
127         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128         A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
129         A->factorerror_zeropivot_row   = i;
130         PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
131       }
132 
133       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134       diag += 1;
135     }
136     break;
137   case 2:
138     for (i = 0; i < mbs; i++) {
139       odiag   = v + 4 * adiag[i];
140       diag[0] = odiag[0];
141       diag[1] = odiag[1];
142       diag[2] = odiag[2];
143       diag[3] = odiag[3];
144       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146       diag += 4;
147     }
148     break;
149   case 3:
150     for (i = 0; i < mbs; i++) {
151       odiag   = v + 9 * adiag[i];
152       diag[0] = odiag[0];
153       diag[1] = odiag[1];
154       diag[2] = odiag[2];
155       diag[3] = odiag[3];
156       diag[4] = odiag[4];
157       diag[5] = odiag[5];
158       diag[6] = odiag[6];
159       diag[7] = odiag[7];
160       diag[8] = odiag[8];
161       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag += 9;
164     }
165     break;
166   case 4:
167     for (i = 0; i < mbs; i++) {
168       odiag = v + 16 * adiag[i];
169       PetscCall(PetscArraycpy(diag, odiag, 16));
170       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag += 16;
173     }
174     break;
175   case 5:
176     for (i = 0; i < mbs; i++) {
177       odiag = v + 25 * adiag[i];
178       PetscCall(PetscArraycpy(diag, odiag, 25));
179       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag += 25;
182     }
183     break;
184   case 6:
185     for (i = 0; i < mbs; i++) {
186       odiag = v + 36 * adiag[i];
187       PetscCall(PetscArraycpy(diag, odiag, 36));
188       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190       diag += 36;
191     }
192     break;
193   case 7:
194     for (i = 0; i < mbs; i++) {
195       odiag = v + 49 * adiag[i];
196       PetscCall(PetscArraycpy(diag, odiag, 49));
197       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199       diag += 49;
200     }
201     break;
202   default:
203     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204     for (i = 0; i < mbs; i++) {
205       odiag = v + bs2 * adiag[i];
206       PetscCall(PetscArraycpy(diag, odiag, bs2));
207       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209       diag += bs2;
210     }
211     PetscCall(PetscFree2(v_work, v_pivots));
212   }
213   a->idiagvalid = PETSC_TRUE;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218 {
219   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220   PetscScalar       *x, *work, *w, *workt, *t;
221   const MatScalar   *v, *aa = a->a, *idiag;
222   const PetscScalar *b, *xb;
223   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226 
227   PetscFunctionBegin;
228   its = its * lits;
229   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234 
235   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236 
237   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238   diag  = a->diag;
239   idiag = a->idiag;
240   k     = PetscMax(A->rmap->n, A->cmap->n);
241   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244   work = a->mult_work;
245   t    = a->sor_workt;
246   w    = a->sor_work;
247 
248   PetscCall(VecGetArray(xx, &x));
249   PetscCall(VecGetArrayRead(bb, &b));
250 
251   if (flag & SOR_ZERO_INITIAL_GUESS) {
252     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253       switch (bs) {
254       case 1:
255         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256         t[0] = b[0];
257         i2   = 1;
258         idiag += 1;
259         for (i = 1; i < m; i++) {
260           v    = aa + ai[i];
261           vi   = aj + ai[i];
262           nz   = diag[i] - ai[i];
263           s[0] = b[i2];
264           for (j = 0; j < nz; j++) {
265             xw[0] = x[vi[j]];
266             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267           }
268           t[i2] = s[0];
269           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270           x[i2] = xw[0];
271           idiag += 1;
272           i2 += 1;
273         }
274         break;
275       case 2:
276         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277         t[0] = b[0];
278         t[1] = b[1];
279         i2   = 2;
280         idiag += 4;
281         for (i = 1; i < m; i++) {
282           v    = aa + 4 * ai[i];
283           vi   = aj + ai[i];
284           nz   = diag[i] - ai[i];
285           s[0] = b[i2];
286           s[1] = b[i2 + 1];
287           for (j = 0; j < nz; j++) {
288             idx   = 2 * vi[j];
289             it    = 4 * j;
290             xw[0] = x[idx];
291             xw[1] = x[1 + idx];
292             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293           }
294           t[i2]     = s[0];
295           t[i2 + 1] = s[1];
296           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297           x[i2]     = xw[0];
298           x[i2 + 1] = xw[1];
299           idiag += 4;
300           i2 += 2;
301         }
302         break;
303       case 3:
304         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305         t[0] = b[0];
306         t[1] = b[1];
307         t[2] = b[2];
308         i2   = 3;
309         idiag += 9;
310         for (i = 1; i < m; i++) {
311           v    = aa + 9 * ai[i];
312           vi   = aj + ai[i];
313           nz   = diag[i] - ai[i];
314           s[0] = b[i2];
315           s[1] = b[i2 + 1];
316           s[2] = b[i2 + 2];
317           while (nz--) {
318             idx   = 3 * (*vi++);
319             xw[0] = x[idx];
320             xw[1] = x[1 + idx];
321             xw[2] = x[2 + idx];
322             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323             v += 9;
324           }
325           t[i2]     = s[0];
326           t[i2 + 1] = s[1];
327           t[i2 + 2] = s[2];
328           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329           x[i2]     = xw[0];
330           x[i2 + 1] = xw[1];
331           x[i2 + 2] = xw[2];
332           idiag += 9;
333           i2 += 3;
334         }
335         break;
336       case 4:
337         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338         t[0] = b[0];
339         t[1] = b[1];
340         t[2] = b[2];
341         t[3] = b[3];
342         i2   = 4;
343         idiag += 16;
344         for (i = 1; i < m; i++) {
345           v    = aa + 16 * ai[i];
346           vi   = aj + ai[i];
347           nz   = diag[i] - ai[i];
348           s[0] = b[i2];
349           s[1] = b[i2 + 1];
350           s[2] = b[i2 + 2];
351           s[3] = b[i2 + 3];
352           while (nz--) {
353             idx   = 4 * (*vi++);
354             xw[0] = x[idx];
355             xw[1] = x[1 + idx];
356             xw[2] = x[2 + idx];
357             xw[3] = x[3 + idx];
358             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359             v += 16;
360           }
361           t[i2]     = s[0];
362           t[i2 + 1] = s[1];
363           t[i2 + 2] = s[2];
364           t[i2 + 3] = s[3];
365           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366           x[i2]     = xw[0];
367           x[i2 + 1] = xw[1];
368           x[i2 + 2] = xw[2];
369           x[i2 + 3] = xw[3];
370           idiag += 16;
371           i2 += 4;
372         }
373         break;
374       case 5:
375         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376         t[0] = b[0];
377         t[1] = b[1];
378         t[2] = b[2];
379         t[3] = b[3];
380         t[4] = b[4];
381         i2   = 5;
382         idiag += 25;
383         for (i = 1; i < m; i++) {
384           v    = aa + 25 * ai[i];
385           vi   = aj + ai[i];
386           nz   = diag[i] - ai[i];
387           s[0] = b[i2];
388           s[1] = b[i2 + 1];
389           s[2] = b[i2 + 2];
390           s[3] = b[i2 + 3];
391           s[4] = b[i2 + 4];
392           while (nz--) {
393             idx   = 5 * (*vi++);
394             xw[0] = x[idx];
395             xw[1] = x[1 + idx];
396             xw[2] = x[2 + idx];
397             xw[3] = x[3 + idx];
398             xw[4] = x[4 + idx];
399             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400             v += 25;
401           }
402           t[i2]     = s[0];
403           t[i2 + 1] = s[1];
404           t[i2 + 2] = s[2];
405           t[i2 + 3] = s[3];
406           t[i2 + 4] = s[4];
407           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408           x[i2]     = xw[0];
409           x[i2 + 1] = xw[1];
410           x[i2 + 2] = xw[2];
411           x[i2 + 3] = xw[3];
412           x[i2 + 4] = xw[4];
413           idiag += 25;
414           i2 += 5;
415         }
416         break;
417       case 6:
418         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419         t[0] = b[0];
420         t[1] = b[1];
421         t[2] = b[2];
422         t[3] = b[3];
423         t[4] = b[4];
424         t[5] = b[5];
425         i2   = 6;
426         idiag += 36;
427         for (i = 1; i < m; i++) {
428           v    = aa + 36 * ai[i];
429           vi   = aj + ai[i];
430           nz   = diag[i] - ai[i];
431           s[0] = b[i2];
432           s[1] = b[i2 + 1];
433           s[2] = b[i2 + 2];
434           s[3] = b[i2 + 3];
435           s[4] = b[i2 + 4];
436           s[5] = b[i2 + 5];
437           while (nz--) {
438             idx   = 6 * (*vi++);
439             xw[0] = x[idx];
440             xw[1] = x[1 + idx];
441             xw[2] = x[2 + idx];
442             xw[3] = x[3 + idx];
443             xw[4] = x[4 + idx];
444             xw[5] = x[5 + idx];
445             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446             v += 36;
447           }
448           t[i2]     = s[0];
449           t[i2 + 1] = s[1];
450           t[i2 + 2] = s[2];
451           t[i2 + 3] = s[3];
452           t[i2 + 4] = s[4];
453           t[i2 + 5] = s[5];
454           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455           x[i2]     = xw[0];
456           x[i2 + 1] = xw[1];
457           x[i2 + 2] = xw[2];
458           x[i2 + 3] = xw[3];
459           x[i2 + 4] = xw[4];
460           x[i2 + 5] = xw[5];
461           idiag += 36;
462           i2 += 6;
463         }
464         break;
465       case 7:
466         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467         t[0] = b[0];
468         t[1] = b[1];
469         t[2] = b[2];
470         t[3] = b[3];
471         t[4] = b[4];
472         t[5] = b[5];
473         t[6] = b[6];
474         i2   = 7;
475         idiag += 49;
476         for (i = 1; i < m; i++) {
477           v    = aa + 49 * ai[i];
478           vi   = aj + ai[i];
479           nz   = diag[i] - ai[i];
480           s[0] = b[i2];
481           s[1] = b[i2 + 1];
482           s[2] = b[i2 + 2];
483           s[3] = b[i2 + 3];
484           s[4] = b[i2 + 4];
485           s[5] = b[i2 + 5];
486           s[6] = b[i2 + 6];
487           while (nz--) {
488             idx   = 7 * (*vi++);
489             xw[0] = x[idx];
490             xw[1] = x[1 + idx];
491             xw[2] = x[2 + idx];
492             xw[3] = x[3 + idx];
493             xw[4] = x[4 + idx];
494             xw[5] = x[5 + idx];
495             xw[6] = x[6 + idx];
496             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497             v += 49;
498           }
499           t[i2]     = s[0];
500           t[i2 + 1] = s[1];
501           t[i2 + 2] = s[2];
502           t[i2 + 3] = s[3];
503           t[i2 + 4] = s[4];
504           t[i2 + 5] = s[5];
505           t[i2 + 6] = s[6];
506           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507           x[i2]     = xw[0];
508           x[i2 + 1] = xw[1];
509           x[i2 + 2] = xw[2];
510           x[i2 + 3] = xw[3];
511           x[i2 + 4] = xw[4];
512           x[i2 + 5] = xw[5];
513           x[i2 + 6] = xw[6];
514           idiag += 49;
515           i2 += 7;
516         }
517         break;
518       default:
519         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520         PetscCall(PetscArraycpy(t, b, bs));
521         i2 = bs;
522         idiag += bs2;
523         for (i = 1; i < m; i++) {
524           v  = aa + bs2 * ai[i];
525           vi = aj + ai[i];
526           nz = diag[i] - ai[i];
527 
528           PetscCall(PetscArraycpy(w, b + i2, bs));
529           /* copy all rows of x that are needed into contiguous space */
530           workt = work;
531           for (j = 0; j < nz; j++) {
532             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533             workt += bs;
534           }
535           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536           PetscCall(PetscArraycpy(t + i2, w, bs));
537           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538 
539           idiag += bs2;
540           i2 += bs;
541         }
542         break;
543       }
544       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546       xb = t;
547     } else xb = b;
548     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549       idiag = a->idiag + bs2 * (a->mbs - 1);
550       i2    = bs * (m - 1);
551       switch (bs) {
552       case 1:
553         s[0] = xb[i2];
554         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555         x[i2] = xw[0];
556         i2 -= 1;
557         for (i = m - 2; i >= 0; i--) {
558           v    = aa + (diag[i] + 1);
559           vi   = aj + diag[i] + 1;
560           nz   = ai[i + 1] - diag[i] - 1;
561           s[0] = xb[i2];
562           for (j = 0; j < nz; j++) {
563             xw[0] = x[vi[j]];
564             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565           }
566           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567           x[i2] = xw[0];
568           idiag -= 1;
569           i2 -= 1;
570         }
571         break;
572       case 2:
573         s[0] = xb[i2];
574         s[1] = xb[i2 + 1];
575         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576         x[i2]     = xw[0];
577         x[i2 + 1] = xw[1];
578         i2 -= 2;
579         idiag -= 4;
580         for (i = m - 2; i >= 0; i--) {
581           v    = aa + 4 * (diag[i] + 1);
582           vi   = aj + diag[i] + 1;
583           nz   = ai[i + 1] - diag[i] - 1;
584           s[0] = xb[i2];
585           s[1] = xb[i2 + 1];
586           for (j = 0; j < nz; j++) {
587             idx   = 2 * vi[j];
588             it    = 4 * j;
589             xw[0] = x[idx];
590             xw[1] = x[1 + idx];
591             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592           }
593           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594           x[i2]     = xw[0];
595           x[i2 + 1] = xw[1];
596           idiag -= 4;
597           i2 -= 2;
598         }
599         break;
600       case 3:
601         s[0] = xb[i2];
602         s[1] = xb[i2 + 1];
603         s[2] = xb[i2 + 2];
604         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605         x[i2]     = xw[0];
606         x[i2 + 1] = xw[1];
607         x[i2 + 2] = xw[2];
608         i2 -= 3;
609         idiag -= 9;
610         for (i = m - 2; i >= 0; i--) {
611           v    = aa + 9 * (diag[i] + 1);
612           vi   = aj + diag[i] + 1;
613           nz   = ai[i + 1] - diag[i] - 1;
614           s[0] = xb[i2];
615           s[1] = xb[i2 + 1];
616           s[2] = xb[i2 + 2];
617           while (nz--) {
618             idx   = 3 * (*vi++);
619             xw[0] = x[idx];
620             xw[1] = x[1 + idx];
621             xw[2] = x[2 + idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623             v += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626           x[i2]     = xw[0];
627           x[i2 + 1] = xw[1];
628           x[i2 + 2] = xw[2];
629           idiag -= 9;
630           i2 -= 3;
631         }
632         break;
633       case 4:
634         s[0] = xb[i2];
635         s[1] = xb[i2 + 1];
636         s[2] = xb[i2 + 2];
637         s[3] = xb[i2 + 3];
638         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639         x[i2]     = xw[0];
640         x[i2 + 1] = xw[1];
641         x[i2 + 2] = xw[2];
642         x[i2 + 3] = xw[3];
643         i2 -= 4;
644         idiag -= 16;
645         for (i = m - 2; i >= 0; i--) {
646           v    = aa + 16 * (diag[i] + 1);
647           vi   = aj + diag[i] + 1;
648           nz   = ai[i + 1] - diag[i] - 1;
649           s[0] = xb[i2];
650           s[1] = xb[i2 + 1];
651           s[2] = xb[i2 + 2];
652           s[3] = xb[i2 + 3];
653           while (nz--) {
654             idx   = 4 * (*vi++);
655             xw[0] = x[idx];
656             xw[1] = x[1 + idx];
657             xw[2] = x[2 + idx];
658             xw[3] = x[3 + idx];
659             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660             v += 16;
661           }
662           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663           x[i2]     = xw[0];
664           x[i2 + 1] = xw[1];
665           x[i2 + 2] = xw[2];
666           x[i2 + 3] = xw[3];
667           idiag -= 16;
668           i2 -= 4;
669         }
670         break;
671       case 5:
672         s[0] = xb[i2];
673         s[1] = xb[i2 + 1];
674         s[2] = xb[i2 + 2];
675         s[3] = xb[i2 + 3];
676         s[4] = xb[i2 + 4];
677         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678         x[i2]     = xw[0];
679         x[i2 + 1] = xw[1];
680         x[i2 + 2] = xw[2];
681         x[i2 + 3] = xw[3];
682         x[i2 + 4] = xw[4];
683         i2 -= 5;
684         idiag -= 25;
685         for (i = m - 2; i >= 0; i--) {
686           v    = aa + 25 * (diag[i] + 1);
687           vi   = aj + diag[i] + 1;
688           nz   = ai[i + 1] - diag[i] - 1;
689           s[0] = xb[i2];
690           s[1] = xb[i2 + 1];
691           s[2] = xb[i2 + 2];
692           s[3] = xb[i2 + 3];
693           s[4] = xb[i2 + 4];
694           while (nz--) {
695             idx   = 5 * (*vi++);
696             xw[0] = x[idx];
697             xw[1] = x[1 + idx];
698             xw[2] = x[2 + idx];
699             xw[3] = x[3 + idx];
700             xw[4] = x[4 + idx];
701             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702             v += 25;
703           }
704           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705           x[i2]     = xw[0];
706           x[i2 + 1] = xw[1];
707           x[i2 + 2] = xw[2];
708           x[i2 + 3] = xw[3];
709           x[i2 + 4] = xw[4];
710           idiag -= 25;
711           i2 -= 5;
712         }
713         break;
714       case 6:
715         s[0] = xb[i2];
716         s[1] = xb[i2 + 1];
717         s[2] = xb[i2 + 2];
718         s[3] = xb[i2 + 3];
719         s[4] = xb[i2 + 4];
720         s[5] = xb[i2 + 5];
721         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722         x[i2]     = xw[0];
723         x[i2 + 1] = xw[1];
724         x[i2 + 2] = xw[2];
725         x[i2 + 3] = xw[3];
726         x[i2 + 4] = xw[4];
727         x[i2 + 5] = xw[5];
728         i2 -= 6;
729         idiag -= 36;
730         for (i = m - 2; i >= 0; i--) {
731           v    = aa + 36 * (diag[i] + 1);
732           vi   = aj + diag[i] + 1;
733           nz   = ai[i + 1] - diag[i] - 1;
734           s[0] = xb[i2];
735           s[1] = xb[i2 + 1];
736           s[2] = xb[i2 + 2];
737           s[3] = xb[i2 + 3];
738           s[4] = xb[i2 + 4];
739           s[5] = xb[i2 + 5];
740           while (nz--) {
741             idx   = 6 * (*vi++);
742             xw[0] = x[idx];
743             xw[1] = x[1 + idx];
744             xw[2] = x[2 + idx];
745             xw[3] = x[3 + idx];
746             xw[4] = x[4 + idx];
747             xw[5] = x[5 + idx];
748             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749             v += 36;
750           }
751           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752           x[i2]     = xw[0];
753           x[i2 + 1] = xw[1];
754           x[i2 + 2] = xw[2];
755           x[i2 + 3] = xw[3];
756           x[i2 + 4] = xw[4];
757           x[i2 + 5] = xw[5];
758           idiag -= 36;
759           i2 -= 6;
760         }
761         break;
762       case 7:
763         s[0] = xb[i2];
764         s[1] = xb[i2 + 1];
765         s[2] = xb[i2 + 2];
766         s[3] = xb[i2 + 3];
767         s[4] = xb[i2 + 4];
768         s[5] = xb[i2 + 5];
769         s[6] = xb[i2 + 6];
770         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771         x[i2]     = xw[0];
772         x[i2 + 1] = xw[1];
773         x[i2 + 2] = xw[2];
774         x[i2 + 3] = xw[3];
775         x[i2 + 4] = xw[4];
776         x[i2 + 5] = xw[5];
777         x[i2 + 6] = xw[6];
778         i2 -= 7;
779         idiag -= 49;
780         for (i = m - 2; i >= 0; i--) {
781           v    = aa + 49 * (diag[i] + 1);
782           vi   = aj + diag[i] + 1;
783           nz   = ai[i + 1] - diag[i] - 1;
784           s[0] = xb[i2];
785           s[1] = xb[i2 + 1];
786           s[2] = xb[i2 + 2];
787           s[3] = xb[i2 + 3];
788           s[4] = xb[i2 + 4];
789           s[5] = xb[i2 + 5];
790           s[6] = xb[i2 + 6];
791           while (nz--) {
792             idx   = 7 * (*vi++);
793             xw[0] = x[idx];
794             xw[1] = x[1 + idx];
795             xw[2] = x[2 + idx];
796             xw[3] = x[3 + idx];
797             xw[4] = x[4 + idx];
798             xw[5] = x[5 + idx];
799             xw[6] = x[6 + idx];
800             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801             v += 49;
802           }
803           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804           x[i2]     = xw[0];
805           x[i2 + 1] = xw[1];
806           x[i2 + 2] = xw[2];
807           x[i2 + 3] = xw[3];
808           x[i2 + 4] = xw[4];
809           x[i2 + 5] = xw[5];
810           x[i2 + 6] = xw[6];
811           idiag -= 49;
812           i2 -= 7;
813         }
814         break;
815       default:
816         PetscCall(PetscArraycpy(w, xb + i2, bs));
817         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818         i2 -= bs;
819         idiag -= bs2;
820         for (i = m - 2; i >= 0; i--) {
821           v  = aa + bs2 * (diag[i] + 1);
822           vi = aj + diag[i] + 1;
823           nz = ai[i + 1] - diag[i] - 1;
824 
825           PetscCall(PetscArraycpy(w, xb + i2, bs));
826           /* copy all rows of x that are needed into contiguous space */
827           workt = work;
828           for (j = 0; j < nz; j++) {
829             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830             workt += bs;
831           }
832           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834 
835           idiag -= bs2;
836           i2 -= bs;
837         }
838         break;
839       }
840       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841     }
842     its--;
843   }
844   while (its--) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846       idiag = a->idiag;
847       i2    = 0;
848       switch (bs) {
849       case 1:
850         for (i = 0; i < m; i++) {
851           v    = aa + ai[i];
852           vi   = aj + ai[i];
853           nz   = ai[i + 1] - ai[i];
854           s[0] = b[i2];
855           for (j = 0; j < nz; j++) {
856             xw[0] = x[vi[j]];
857             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858           }
859           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860           x[i2] += xw[0];
861           idiag += 1;
862           i2 += 1;
863         }
864         break;
865       case 2:
866         for (i = 0; i < m; i++) {
867           v    = aa + 4 * ai[i];
868           vi   = aj + ai[i];
869           nz   = ai[i + 1] - ai[i];
870           s[0] = b[i2];
871           s[1] = b[i2 + 1];
872           for (j = 0; j < nz; j++) {
873             idx   = 2 * vi[j];
874             it    = 4 * j;
875             xw[0] = x[idx];
876             xw[1] = x[1 + idx];
877             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878           }
879           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880           x[i2] += xw[0];
881           x[i2 + 1] += xw[1];
882           idiag += 4;
883           i2 += 2;
884         }
885         break;
886       case 3:
887         for (i = 0; i < m; i++) {
888           v    = aa + 9 * ai[i];
889           vi   = aj + ai[i];
890           nz   = ai[i + 1] - ai[i];
891           s[0] = b[i2];
892           s[1] = b[i2 + 1];
893           s[2] = b[i2 + 2];
894           while (nz--) {
895             idx   = 3 * (*vi++);
896             xw[0] = x[idx];
897             xw[1] = x[1 + idx];
898             xw[2] = x[2 + idx];
899             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900             v += 9;
901           }
902           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903           x[i2] += xw[0];
904           x[i2 + 1] += xw[1];
905           x[i2 + 2] += xw[2];
906           idiag += 9;
907           i2 += 3;
908         }
909         break;
910       case 4:
911         for (i = 0; i < m; i++) {
912           v    = aa + 16 * ai[i];
913           vi   = aj + ai[i];
914           nz   = ai[i + 1] - ai[i];
915           s[0] = b[i2];
916           s[1] = b[i2 + 1];
917           s[2] = b[i2 + 2];
918           s[3] = b[i2 + 3];
919           while (nz--) {
920             idx   = 4 * (*vi++);
921             xw[0] = x[idx];
922             xw[1] = x[1 + idx];
923             xw[2] = x[2 + idx];
924             xw[3] = x[3 + idx];
925             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926             v += 16;
927           }
928           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929           x[i2] += xw[0];
930           x[i2 + 1] += xw[1];
931           x[i2 + 2] += xw[2];
932           x[i2 + 3] += xw[3];
933           idiag += 16;
934           i2 += 4;
935         }
936         break;
937       case 5:
938         for (i = 0; i < m; i++) {
939           v    = aa + 25 * ai[i];
940           vi   = aj + ai[i];
941           nz   = ai[i + 1] - ai[i];
942           s[0] = b[i2];
943           s[1] = b[i2 + 1];
944           s[2] = b[i2 + 2];
945           s[3] = b[i2 + 3];
946           s[4] = b[i2 + 4];
947           while (nz--) {
948             idx   = 5 * (*vi++);
949             xw[0] = x[idx];
950             xw[1] = x[1 + idx];
951             xw[2] = x[2 + idx];
952             xw[3] = x[3 + idx];
953             xw[4] = x[4 + idx];
954             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955             v += 25;
956           }
957           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958           x[i2] += xw[0];
959           x[i2 + 1] += xw[1];
960           x[i2 + 2] += xw[2];
961           x[i2 + 3] += xw[3];
962           x[i2 + 4] += xw[4];
963           idiag += 25;
964           i2 += 5;
965         }
966         break;
967       case 6:
968         for (i = 0; i < m; i++) {
969           v    = aa + 36 * ai[i];
970           vi   = aj + ai[i];
971           nz   = ai[i + 1] - ai[i];
972           s[0] = b[i2];
973           s[1] = b[i2 + 1];
974           s[2] = b[i2 + 2];
975           s[3] = b[i2 + 3];
976           s[4] = b[i2 + 4];
977           s[5] = b[i2 + 5];
978           while (nz--) {
979             idx   = 6 * (*vi++);
980             xw[0] = x[idx];
981             xw[1] = x[1 + idx];
982             xw[2] = x[2 + idx];
983             xw[3] = x[3 + idx];
984             xw[4] = x[4 + idx];
985             xw[5] = x[5 + idx];
986             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987             v += 36;
988           }
989           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990           x[i2] += xw[0];
991           x[i2 + 1] += xw[1];
992           x[i2 + 2] += xw[2];
993           x[i2 + 3] += xw[3];
994           x[i2 + 4] += xw[4];
995           x[i2 + 5] += xw[5];
996           idiag += 36;
997           i2 += 6;
998         }
999         break;
1000       case 7:
1001         for (i = 0; i < m; i++) {
1002           v    = aa + 49 * ai[i];
1003           vi   = aj + ai[i];
1004           nz   = ai[i + 1] - ai[i];
1005           s[0] = b[i2];
1006           s[1] = b[i2 + 1];
1007           s[2] = b[i2 + 2];
1008           s[3] = b[i2 + 3];
1009           s[4] = b[i2 + 4];
1010           s[5] = b[i2 + 5];
1011           s[6] = b[i2 + 6];
1012           while (nz--) {
1013             idx   = 7 * (*vi++);
1014             xw[0] = x[idx];
1015             xw[1] = x[1 + idx];
1016             xw[2] = x[2 + idx];
1017             xw[3] = x[3 + idx];
1018             xw[4] = x[4 + idx];
1019             xw[5] = x[5 + idx];
1020             xw[6] = x[6 + idx];
1021             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022             v += 49;
1023           }
1024           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025           x[i2] += xw[0];
1026           x[i2 + 1] += xw[1];
1027           x[i2 + 2] += xw[2];
1028           x[i2 + 3] += xw[3];
1029           x[i2 + 4] += xw[4];
1030           x[i2 + 5] += xw[5];
1031           x[i2 + 6] += xw[6];
1032           idiag += 49;
1033           i2 += 7;
1034         }
1035         break;
1036       default:
1037         for (i = 0; i < m; i++) {
1038           v  = aa + bs2 * ai[i];
1039           vi = aj + ai[i];
1040           nz = ai[i + 1] - ai[i];
1041 
1042           PetscCall(PetscArraycpy(w, b + i2, bs));
1043           /* copy all rows of x that are needed into contiguous space */
1044           workt = work;
1045           for (j = 0; j < nz; j++) {
1046             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047             workt += bs;
1048           }
1049           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051 
1052           idiag += bs2;
1053           i2 += bs;
1054         }
1055         break;
1056       }
1057       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058     }
1059     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060       idiag = a->idiag + bs2 * (a->mbs - 1);
1061       i2    = bs * (m - 1);
1062       switch (bs) {
1063       case 1:
1064         for (i = m - 1; i >= 0; i--) {
1065           v    = aa + ai[i];
1066           vi   = aj + ai[i];
1067           nz   = ai[i + 1] - ai[i];
1068           s[0] = b[i2];
1069           for (j = 0; j < nz; j++) {
1070             xw[0] = x[vi[j]];
1071             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072           }
1073           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074           x[i2] += xw[0];
1075           idiag -= 1;
1076           i2 -= 1;
1077         }
1078         break;
1079       case 2:
1080         for (i = m - 1; i >= 0; i--) {
1081           v    = aa + 4 * ai[i];
1082           vi   = aj + ai[i];
1083           nz   = ai[i + 1] - ai[i];
1084           s[0] = b[i2];
1085           s[1] = b[i2 + 1];
1086           for (j = 0; j < nz; j++) {
1087             idx   = 2 * vi[j];
1088             it    = 4 * j;
1089             xw[0] = x[idx];
1090             xw[1] = x[1 + idx];
1091             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092           }
1093           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094           x[i2] += xw[0];
1095           x[i2 + 1] += xw[1];
1096           idiag -= 4;
1097           i2 -= 2;
1098         }
1099         break;
1100       case 3:
1101         for (i = m - 1; i >= 0; i--) {
1102           v    = aa + 9 * ai[i];
1103           vi   = aj + ai[i];
1104           nz   = ai[i + 1] - ai[i];
1105           s[0] = b[i2];
1106           s[1] = b[i2 + 1];
1107           s[2] = b[i2 + 2];
1108           while (nz--) {
1109             idx   = 3 * (*vi++);
1110             xw[0] = x[idx];
1111             xw[1] = x[1 + idx];
1112             xw[2] = x[2 + idx];
1113             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114             v += 9;
1115           }
1116           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117           x[i2] += xw[0];
1118           x[i2 + 1] += xw[1];
1119           x[i2 + 2] += xw[2];
1120           idiag -= 9;
1121           i2 -= 3;
1122         }
1123         break;
1124       case 4:
1125         for (i = m - 1; i >= 0; i--) {
1126           v    = aa + 16 * ai[i];
1127           vi   = aj + ai[i];
1128           nz   = ai[i + 1] - ai[i];
1129           s[0] = b[i2];
1130           s[1] = b[i2 + 1];
1131           s[2] = b[i2 + 2];
1132           s[3] = b[i2 + 3];
1133           while (nz--) {
1134             idx   = 4 * (*vi++);
1135             xw[0] = x[idx];
1136             xw[1] = x[1 + idx];
1137             xw[2] = x[2 + idx];
1138             xw[3] = x[3 + idx];
1139             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140             v += 16;
1141           }
1142           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143           x[i2] += xw[0];
1144           x[i2 + 1] += xw[1];
1145           x[i2 + 2] += xw[2];
1146           x[i2 + 3] += xw[3];
1147           idiag -= 16;
1148           i2 -= 4;
1149         }
1150         break;
1151       case 5:
1152         for (i = m - 1; i >= 0; i--) {
1153           v    = aa + 25 * ai[i];
1154           vi   = aj + ai[i];
1155           nz   = ai[i + 1] - ai[i];
1156           s[0] = b[i2];
1157           s[1] = b[i2 + 1];
1158           s[2] = b[i2 + 2];
1159           s[3] = b[i2 + 3];
1160           s[4] = b[i2 + 4];
1161           while (nz--) {
1162             idx   = 5 * (*vi++);
1163             xw[0] = x[idx];
1164             xw[1] = x[1 + idx];
1165             xw[2] = x[2 + idx];
1166             xw[3] = x[3 + idx];
1167             xw[4] = x[4 + idx];
1168             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169             v += 25;
1170           }
1171           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172           x[i2] += xw[0];
1173           x[i2 + 1] += xw[1];
1174           x[i2 + 2] += xw[2];
1175           x[i2 + 3] += xw[3];
1176           x[i2 + 4] += xw[4];
1177           idiag -= 25;
1178           i2 -= 5;
1179         }
1180         break;
1181       case 6:
1182         for (i = m - 1; i >= 0; i--) {
1183           v    = aa + 36 * ai[i];
1184           vi   = aj + ai[i];
1185           nz   = ai[i + 1] - ai[i];
1186           s[0] = b[i2];
1187           s[1] = b[i2 + 1];
1188           s[2] = b[i2 + 2];
1189           s[3] = b[i2 + 3];
1190           s[4] = b[i2 + 4];
1191           s[5] = b[i2 + 5];
1192           while (nz--) {
1193             idx   = 6 * (*vi++);
1194             xw[0] = x[idx];
1195             xw[1] = x[1 + idx];
1196             xw[2] = x[2 + idx];
1197             xw[3] = x[3 + idx];
1198             xw[4] = x[4 + idx];
1199             xw[5] = x[5 + idx];
1200             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201             v += 36;
1202           }
1203           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204           x[i2] += xw[0];
1205           x[i2 + 1] += xw[1];
1206           x[i2 + 2] += xw[2];
1207           x[i2 + 3] += xw[3];
1208           x[i2 + 4] += xw[4];
1209           x[i2 + 5] += xw[5];
1210           idiag -= 36;
1211           i2 -= 6;
1212         }
1213         break;
1214       case 7:
1215         for (i = m - 1; i >= 0; i--) {
1216           v    = aa + 49 * ai[i];
1217           vi   = aj + ai[i];
1218           nz   = ai[i + 1] - ai[i];
1219           s[0] = b[i2];
1220           s[1] = b[i2 + 1];
1221           s[2] = b[i2 + 2];
1222           s[3] = b[i2 + 3];
1223           s[4] = b[i2 + 4];
1224           s[5] = b[i2 + 5];
1225           s[6] = b[i2 + 6];
1226           while (nz--) {
1227             idx   = 7 * (*vi++);
1228             xw[0] = x[idx];
1229             xw[1] = x[1 + idx];
1230             xw[2] = x[2 + idx];
1231             xw[3] = x[3 + idx];
1232             xw[4] = x[4 + idx];
1233             xw[5] = x[5 + idx];
1234             xw[6] = x[6 + idx];
1235             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236             v += 49;
1237           }
1238           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239           x[i2] += xw[0];
1240           x[i2 + 1] += xw[1];
1241           x[i2 + 2] += xw[2];
1242           x[i2 + 3] += xw[3];
1243           x[i2 + 4] += xw[4];
1244           x[i2 + 5] += xw[5];
1245           x[i2 + 6] += xw[6];
1246           idiag -= 49;
1247           i2 -= 7;
1248         }
1249         break;
1250       default:
1251         for (i = m - 1; i >= 0; i--) {
1252           v  = aa + bs2 * ai[i];
1253           vi = aj + ai[i];
1254           nz = ai[i + 1] - ai[i];
1255 
1256           PetscCall(PetscArraycpy(w, b + i2, bs));
1257           /* copy all rows of x that are needed into contiguous space */
1258           workt = work;
1259           for (j = 0; j < nz; j++) {
1260             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261             workt += bs;
1262           }
1263           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265 
1266           idiag -= bs2;
1267           i2 -= bs;
1268         }
1269         break;
1270       }
1271       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272     }
1273   }
1274   PetscCall(VecRestoreArray(xx, &x));
1275   PetscCall(VecRestoreArrayRead(bb, &b));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 /*
1280     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281 */
1282 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286 #endif
1287 
1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289 {
1290   Mat                A = *AA;
1291   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293   PetscInt          *ai = a->i, *ailen = a->ilen;
1294   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295   const PetscScalar *value = v;
1296   MatScalar         *ap, *aa = a->a, *bap;
1297 
1298   PetscFunctionBegin;
1299   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300   stepval = (n - 1) * 4;
1301   for (k = 0; k < m; k++) { /* loop over added rows */
1302     row  = im[k];
1303     rp   = aj + ai[row];
1304     ap   = aa + 16 * ai[row];
1305     nrow = ailen[row];
1306     low  = 0;
1307     high = nrow;
1308     for (l = 0; l < n; l++) { /* loop over added columns */
1309       col = in[l];
1310       if (col <= lastcol) low = 0;
1311       else high = nrow;
1312       lastcol = col;
1313       value   = v + k * (stepval + 4 + l) * 4;
1314       while (high - low > 7) {
1315         t = (low + high) / 2;
1316         if (rp[t] > col) high = t;
1317         else low = t;
1318       }
1319       for (i = low; i < high; i++) {
1320         if (rp[i] > col) break;
1321         if (rp[i] == col) {
1322           bap = ap + 16 * i;
1323           for (ii = 0; ii < 4; ii++, value += stepval) {
1324             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325           }
1326           goto noinsert2;
1327         }
1328       }
1329       N = nrow++ - 1;
1330       high++; /* added new column index thus must search to one higher than before */
1331       /* shift up all the later entries in this row */
1332       for (ii = N; ii >= i; ii--) {
1333         rp[ii + 1] = rp[ii];
1334         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335       }
1336       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337       rp[i] = col;
1338       bap   = ap + 16 * i;
1339       for (ii = 0; ii < 4; ii++, value += stepval) {
1340         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341       }
1342     noinsert2:;
1343       low = i;
1344     }
1345     ailen[row] = nrow;
1346   }
1347   PetscFunctionReturnVoid();
1348 }
1349 
1350 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351   #define matsetvalues4_ MATSETVALUES4
1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353   #define matsetvalues4_ matsetvalues4
1354 #endif
1355 
1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357 {
1358   Mat          A = *AA;
1359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361   PetscInt    *ai = a->i, *ailen = a->ilen;
1362   PetscInt    *aj = a->j, brow, bcol;
1363   PetscInt     ridx, cidx, lastcol = -1;
1364   MatScalar   *ap, value, *aa      = a->a, *bap;
1365 
1366   PetscFunctionBegin;
1367   for (k = 0; k < m; k++) { /* loop over added rows */
1368     row  = im[k];
1369     brow = row / 4;
1370     rp   = aj + ai[brow];
1371     ap   = aa + 16 * ai[brow];
1372     nrow = ailen[brow];
1373     low  = 0;
1374     high = nrow;
1375     for (l = 0; l < n; l++) { /* loop over added columns */
1376       col   = in[l];
1377       bcol  = col / 4;
1378       ridx  = row % 4;
1379       cidx  = col % 4;
1380       value = v[l + k * n];
1381       if (col <= lastcol) low = 0;
1382       else high = nrow;
1383       lastcol = col;
1384       while (high - low > 7) {
1385         t = (low + high) / 2;
1386         if (rp[t] > bcol) high = t;
1387         else low = t;
1388       }
1389       for (i = low; i < high; i++) {
1390         if (rp[i] > bcol) break;
1391         if (rp[i] == bcol) {
1392           bap = ap + 16 * i + 4 * cidx + ridx;
1393           *bap += value;
1394           goto noinsert1;
1395         }
1396       }
1397       N = nrow++ - 1;
1398       high++; /* added new column thus must search to one higher than before */
1399       /* shift up all the later entries in this row */
1400       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403       rp[i]                        = bcol;
1404       ap[16 * i + 4 * cidx + ridx] = value;
1405     noinsert1:;
1406       low = i;
1407     }
1408     ailen[brow] = nrow;
1409   }
1410   PetscFunctionReturnVoid();
1411 }
1412 
1413 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1414 {
1415   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1416   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1417   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1418 
1419   PetscFunctionBegin;
1420   *nn = n;
1421   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1422   if (symmetric) {
1423     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1424     nz = tia[n];
1425   } else {
1426     tia = a->i;
1427     tja = a->j;
1428   }
1429 
1430   if (!blockcompressed && bs > 1) {
1431     (*nn) *= bs;
1432     /* malloc & create the natural set of indices */
1433     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1434     if (n) {
1435       (*ia)[0] = oshift;
1436       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1437     }
1438 
1439     for (i = 1; i < n; i++) {
1440       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1441       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1442     }
1443     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1444 
1445     if (inja) {
1446       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1447       cnt = 0;
1448       for (i = 0; i < n; i++) {
1449         for (j = 0; j < bs; j++) {
1450           for (k = tia[i]; k < tia[i + 1]; k++) {
1451             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1452           }
1453         }
1454       }
1455     }
1456 
1457     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1458       PetscCall(PetscFree(tia));
1459       PetscCall(PetscFree(tja));
1460     }
1461   } else if (oshift == 1) {
1462     if (symmetric) {
1463       nz = tia[A->rmap->n / bs];
1464       /*  add 1 to i and j indices */
1465       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1466       *ia = tia;
1467       if (ja) {
1468         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1469         *ja = tja;
1470       }
1471     } else {
1472       nz = a->i[A->rmap->n / bs];
1473       /* malloc space and  add 1 to i and j indices */
1474       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1475       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1476       if (ja) {
1477         PetscCall(PetscMalloc1(nz, ja));
1478         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1479       }
1480     }
1481   } else {
1482     *ia = tia;
1483     if (ja) *ja = tja;
1484   }
1485   PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487 
1488 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1489 {
1490   PetscFunctionBegin;
1491   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1492   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1493     PetscCall(PetscFree(*ia));
1494     if (ja) PetscCall(PetscFree(*ja));
1495   }
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1500 {
1501   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1502 
1503   PetscFunctionBegin;
1504   if (A->hash_active) {
1505     PetscInt bs;
1506     A->ops[0] = a->cops;
1507     PetscCall(PetscHMapIJVDestroy(&a->ht));
1508     PetscCall(MatGetBlockSize(A, &bs));
1509     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1510     PetscCall(PetscFree(a->dnz));
1511     PetscCall(PetscFree(a->bdnz));
1512     A->hash_active = PETSC_FALSE;
1513   }
1514   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1515   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1516   PetscCall(ISDestroy(&a->row));
1517   PetscCall(ISDestroy(&a->col));
1518   PetscCall(PetscFree(a->diag));
1519   PetscCall(PetscFree(a->idiag));
1520   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1521   PetscCall(PetscFree(a->solve_work));
1522   PetscCall(PetscFree(a->mult_work));
1523   PetscCall(PetscFree(a->sor_workt));
1524   PetscCall(PetscFree(a->sor_work));
1525   PetscCall(ISDestroy(&a->icol));
1526   PetscCall(PetscFree(a->saved_values));
1527   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1528 
1529   PetscCall(MatDestroy(&a->sbaijMat));
1530   PetscCall(MatDestroy(&a->parent));
1531   PetscCall(PetscFree(A->data));
1532 
1533   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1534   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1535   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1536   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1537   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1538   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1539   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1540   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1541   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1542   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1543   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1544   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1545 #if defined(PETSC_HAVE_HYPRE)
1546   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1547 #endif
1548   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1549   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1550   PetscFunctionReturn(PETSC_SUCCESS);
1551 }
1552 
1553 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1554 {
1555   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1556 
1557   PetscFunctionBegin;
1558   switch (op) {
1559   case MAT_ROW_ORIENTED:
1560     a->roworiented = flg;
1561     break;
1562   case MAT_KEEP_NONZERO_PATTERN:
1563     a->keepnonzeropattern = flg;
1564     break;
1565   case MAT_NEW_NONZERO_LOCATIONS:
1566     a->nonew = (flg ? 0 : 1);
1567     break;
1568   case MAT_NEW_NONZERO_LOCATION_ERR:
1569     a->nonew = (flg ? -1 : 0);
1570     break;
1571   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1572     a->nonew = (flg ? -2 : 0);
1573     break;
1574   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1575     a->nounused = (flg ? -1 : 0);
1576     break;
1577   default:
1578     break;
1579   }
1580   PetscFunctionReturn(PETSC_SUCCESS);
1581 }
1582 
1583 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1584 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1585 {
1586   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1587   MatScalar   *aa_i;
1588   PetscScalar *v_i;
1589 
1590   PetscFunctionBegin;
1591   bs  = A->rmap->bs;
1592   bs2 = bs * bs;
1593   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1594 
1595   bn  = row / bs; /* Block number */
1596   bp  = row % bs; /* Block Position */
1597   M   = ai[bn + 1] - ai[bn];
1598   *nz = bs * M;
1599 
1600   if (v) {
1601     *v = NULL;
1602     if (*nz) {
1603       PetscCall(PetscMalloc1(*nz, v));
1604       for (i = 0; i < M; i++) { /* for each block in the block row */
1605         v_i  = *v + i * bs;
1606         aa_i = aa + bs2 * (ai[bn] + i);
1607         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1608       }
1609     }
1610   }
1611 
1612   if (idx) {
1613     *idx = NULL;
1614     if (*nz) {
1615       PetscCall(PetscMalloc1(*nz, idx));
1616       for (i = 0; i < M; i++) { /* for each block in the block row */
1617         idx_i = *idx + i * bs;
1618         itmp  = bs * aj[ai[bn] + i];
1619         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1620       }
1621     }
1622   }
1623   PetscFunctionReturn(PETSC_SUCCESS);
1624 }
1625 
1626 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1627 {
1628   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1629 
1630   PetscFunctionBegin;
1631   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1636 {
1637   PetscFunctionBegin;
1638   if (idx) PetscCall(PetscFree(*idx));
1639   if (v) PetscCall(PetscFree(*v));
1640   PetscFunctionReturn(PETSC_SUCCESS);
1641 }
1642 
1643 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1644 {
1645   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1646   Mat          C;
1647   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1648   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1649   MatScalar   *ata, *aa = a->a;
1650 
1651   PetscFunctionBegin;
1652   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1653   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1654   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1655     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1656 
1657     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1658     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1659     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1660     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1661 
1662     at  = (Mat_SeqBAIJ *)C->data;
1663     ati = at->i;
1664     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1665   } else {
1666     C   = *B;
1667     at  = (Mat_SeqBAIJ *)C->data;
1668     ati = at->i;
1669   }
1670 
1671   atj = at->j;
1672   ata = at->a;
1673 
1674   /* Copy ati into atfill so we have locations of the next free space in atj */
1675   PetscCall(PetscArraycpy(atfill, ati, nbs));
1676 
1677   /* Walk through A row-wise and mark nonzero entries of A^T. */
1678   for (i = 0; i < mbs; i++) {
1679     anzj = ai[i + 1] - ai[i];
1680     for (j = 0; j < anzj; j++) {
1681       atj[atfill[*aj]] = i;
1682       for (kr = 0; kr < bs; kr++) {
1683         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1684       }
1685       atfill[*aj++] += 1;
1686     }
1687   }
1688   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1689   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1690 
1691   /* Clean up temporary space and complete requests. */
1692   PetscCall(PetscFree(atfill));
1693 
1694   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1695     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1696     *B = C;
1697   } else {
1698     PetscCall(MatHeaderMerge(A, &C));
1699   }
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1704 {
1705   Mat Btrans;
1706 
1707   PetscFunctionBegin;
1708   *f = PETSC_FALSE;
1709   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1710   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1711   PetscCall(MatDestroy(&Btrans));
1712   PetscFunctionReturn(PETSC_SUCCESS);
1713 }
1714 
1715 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1716 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1717 {
1718   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1719   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1720   PetscInt    *rowlens, *colidxs;
1721   PetscScalar *matvals;
1722 
1723   PetscFunctionBegin;
1724   PetscCall(PetscViewerSetUp(viewer));
1725 
1726   M  = mat->rmap->N;
1727   N  = mat->cmap->N;
1728   m  = mat->rmap->n;
1729   bs = mat->rmap->bs;
1730   nz = bs * bs * A->nz;
1731 
1732   /* write matrix header */
1733   header[0] = MAT_FILE_CLASSID;
1734   header[1] = M;
1735   header[2] = N;
1736   header[3] = nz;
1737   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1738 
1739   /* store row lengths */
1740   PetscCall(PetscMalloc1(m, &rowlens));
1741   for (cnt = 0, i = 0; i < A->mbs; i++)
1742     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1743   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1744   PetscCall(PetscFree(rowlens));
1745 
1746   /* store column indices  */
1747   PetscCall(PetscMalloc1(nz, &colidxs));
1748   for (cnt = 0, i = 0; i < A->mbs; i++)
1749     for (k = 0; k < bs; k++)
1750       for (j = A->i[i]; j < A->i[i + 1]; j++)
1751         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1752   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1753   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1754   PetscCall(PetscFree(colidxs));
1755 
1756   /* store nonzero values */
1757   PetscCall(PetscMalloc1(nz, &matvals));
1758   for (cnt = 0, i = 0; i < A->mbs; i++)
1759     for (k = 0; k < bs; k++)
1760       for (j = A->i[i]; j < A->i[i + 1]; j++)
1761         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1762   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1763   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1764   PetscCall(PetscFree(matvals));
1765 
1766   /* write block size option to the viewer's .info file */
1767   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1772 {
1773   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1774   PetscInt     i, bs = A->rmap->bs, k;
1775 
1776   PetscFunctionBegin;
1777   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1778   for (i = 0; i < a->mbs; i++) {
1779     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1780     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1781     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1782   }
1783   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1784   PetscFunctionReturn(PETSC_SUCCESS);
1785 }
1786 
1787 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1788 {
1789   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1790   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1791   PetscViewerFormat format;
1792 
1793   PetscFunctionBegin;
1794   if (A->structure_only) {
1795     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1796     PetscFunctionReturn(PETSC_SUCCESS);
1797   }
1798 
1799   PetscCall(PetscViewerGetFormat(viewer, &format));
1800   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1801     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1802   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1803     const char *matname;
1804     Mat         aij;
1805     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1806     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1807     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1808     PetscCall(MatView(aij, viewer));
1809     PetscCall(MatDestroy(&aij));
1810   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1811     PetscFunctionReturn(PETSC_SUCCESS);
1812   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1813     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1814     for (i = 0; i < a->mbs; i++) {
1815       for (j = 0; j < bs; j++) {
1816         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1817         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1818           for (l = 0; l < bs; l++) {
1819 #if defined(PETSC_USE_COMPLEX)
1820             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1821               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1822             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1823               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1824             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1825               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1826             }
1827 #else
1828             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1829 #endif
1830           }
1831         }
1832         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1833       }
1834     }
1835     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1836   } else {
1837     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1838     for (i = 0; i < a->mbs; i++) {
1839       for (j = 0; j < bs; j++) {
1840         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1841         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1842           for (l = 0; l < bs; l++) {
1843 #if defined(PETSC_USE_COMPLEX)
1844             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1845               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1846             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1847               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1848             } else {
1849               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1850             }
1851 #else
1852             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1853 #endif
1854           }
1855         }
1856         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1857       }
1858     }
1859     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1860   }
1861   PetscCall(PetscViewerFlush(viewer));
1862   PetscFunctionReturn(PETSC_SUCCESS);
1863 }
1864 
1865 #include <petscdraw.h>
1866 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1867 {
1868   Mat               A = (Mat)Aa;
1869   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1870   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1871   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1872   MatScalar        *aa;
1873   PetscViewer       viewer;
1874   PetscViewerFormat format;
1875   int               color;
1876 
1877   PetscFunctionBegin;
1878   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1879   PetscCall(PetscViewerGetFormat(viewer, &format));
1880   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1881 
1882   /* loop over matrix elements drawing boxes */
1883 
1884   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1885     PetscDrawCollectiveBegin(draw);
1886     /* Blue for negative, Cyan for zero and  Red for positive */
1887     color = PETSC_DRAW_BLUE;
1888     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1889       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1890         y_l = A->rmap->N - row - 1.0;
1891         y_r = y_l + 1.0;
1892         x_l = a->j[j] * bs;
1893         x_r = x_l + 1.0;
1894         aa  = a->a + j * bs2;
1895         for (k = 0; k < bs; k++) {
1896           for (l = 0; l < bs; l++) {
1897             if (PetscRealPart(*aa++) >= 0.) continue;
1898             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1899           }
1900         }
1901       }
1902     }
1903     color = PETSC_DRAW_CYAN;
1904     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1905       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1906         y_l = A->rmap->N - row - 1.0;
1907         y_r = y_l + 1.0;
1908         x_l = a->j[j] * bs;
1909         x_r = x_l + 1.0;
1910         aa  = a->a + j * bs2;
1911         for (k = 0; k < bs; k++) {
1912           for (l = 0; l < bs; l++) {
1913             if (PetscRealPart(*aa++) != 0.) continue;
1914             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1915           }
1916         }
1917       }
1918     }
1919     color = PETSC_DRAW_RED;
1920     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1921       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1922         y_l = A->rmap->N - row - 1.0;
1923         y_r = y_l + 1.0;
1924         x_l = a->j[j] * bs;
1925         x_r = x_l + 1.0;
1926         aa  = a->a + j * bs2;
1927         for (k = 0; k < bs; k++) {
1928           for (l = 0; l < bs; l++) {
1929             if (PetscRealPart(*aa++) <= 0.) continue;
1930             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1931           }
1932         }
1933       }
1934     }
1935     PetscDrawCollectiveEnd(draw);
1936   } else {
1937     /* use contour shading to indicate magnitude of values */
1938     /* first determine max of all nonzero values */
1939     PetscReal minv = 0.0, maxv = 0.0;
1940     PetscDraw popup;
1941 
1942     for (i = 0; i < a->nz * a->bs2; i++) {
1943       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1944     }
1945     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1946     PetscCall(PetscDrawGetPopup(draw, &popup));
1947     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1948 
1949     PetscDrawCollectiveBegin(draw);
1950     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1951       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1952         y_l = A->rmap->N - row - 1.0;
1953         y_r = y_l + 1.0;
1954         x_l = a->j[j] * bs;
1955         x_r = x_l + 1.0;
1956         aa  = a->a + j * bs2;
1957         for (k = 0; k < bs; k++) {
1958           for (l = 0; l < bs; l++) {
1959             MatScalar v = *aa++;
1960             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
1961             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1962           }
1963         }
1964       }
1965     }
1966     PetscDrawCollectiveEnd(draw);
1967   }
1968   PetscFunctionReturn(PETSC_SUCCESS);
1969 }
1970 
1971 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
1972 {
1973   PetscReal xl, yl, xr, yr, w, h;
1974   PetscDraw draw;
1975   PetscBool isnull;
1976 
1977   PetscFunctionBegin;
1978   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1979   PetscCall(PetscDrawIsNull(draw, &isnull));
1980   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1981 
1982   xr = A->cmap->n;
1983   yr = A->rmap->N;
1984   h  = yr / 10.0;
1985   w  = xr / 10.0;
1986   xr += w;
1987   yr += h;
1988   xl = -w;
1989   yl = -h;
1990   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
1991   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
1992   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
1993   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
1994   PetscCall(PetscDrawSave(draw));
1995   PetscFunctionReturn(PETSC_SUCCESS);
1996 }
1997 
1998 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
1999 {
2000   PetscBool isascii, isbinary, isdraw;
2001 
2002   PetscFunctionBegin;
2003   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2004   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2005   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2006   if (isascii) {
2007     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2008   } else if (isbinary) {
2009     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2010   } else if (isdraw) {
2011     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2012   } else {
2013     Mat B;
2014     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2015     PetscCall(MatView(B, viewer));
2016     PetscCall(MatDestroy(&B));
2017   }
2018   PetscFunctionReturn(PETSC_SUCCESS);
2019 }
2020 
2021 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2022 {
2023   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2024   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2025   PetscInt    *ai = a->i, *ailen = a->ilen;
2026   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2027   MatScalar   *ap, *aa = a->a;
2028 
2029   PetscFunctionBegin;
2030   for (k = 0; k < m; k++) { /* loop over rows */
2031     row  = im[k];
2032     brow = row / bs;
2033     if (row < 0) {
2034       v += n;
2035       continue;
2036     } /* negative row */
2037     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2038     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
2039     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2040     nrow = ailen[brow];
2041     for (l = 0; l < n; l++) { /* loop over columns */
2042       if (in[l] < 0) {
2043         v++;
2044         continue;
2045       } /* negative column */
2046       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2047       col  = in[l];
2048       bcol = col / bs;
2049       cidx = col % bs;
2050       ridx = row % bs;
2051       high = nrow;
2052       low  = 0; /* assume unsorted */
2053       while (high - low > 5) {
2054         t = (low + high) / 2;
2055         if (rp[t] > bcol) high = t;
2056         else low = t;
2057       }
2058       for (i = low; i < high; i++) {
2059         if (rp[i] > bcol) break;
2060         if (rp[i] == bcol) {
2061           *v++ = ap[bs2 * i + bs * cidx + ridx];
2062           goto finished;
2063         }
2064       }
2065       *v++ = 0.0;
2066     finished:;
2067     }
2068   }
2069   PetscFunctionReturn(PETSC_SUCCESS);
2070 }
2071 
2072 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2073 {
2074   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2075   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2076   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2077   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2078   PetscBool          roworiented = a->roworiented;
2079   const PetscScalar *value       = v;
2080   MatScalar         *ap = NULL, *aa = a->a, *bap;
2081 
2082   PetscFunctionBegin;
2083   if (roworiented) {
2084     stepval = (n - 1) * bs;
2085   } else {
2086     stepval = (m - 1) * bs;
2087   }
2088   for (k = 0; k < m; k++) { /* loop over added rows */
2089     row = im[k];
2090     if (row < 0) continue;
2091     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2092     rp = aj + ai[row];
2093     if (!A->structure_only) ap = aa + bs2 * ai[row];
2094     rmax = imax[row];
2095     nrow = ailen[row];
2096     low  = 0;
2097     high = nrow;
2098     for (l = 0; l < n; l++) { /* loop over added columns */
2099       if (in[l] < 0) continue;
2100       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2101       col = in[l];
2102       if (!A->structure_only) {
2103         if (roworiented) {
2104           value = v + (k * (stepval + bs) + l) * bs;
2105         } else {
2106           value = v + (l * (stepval + bs) + k) * bs;
2107         }
2108       }
2109       if (col <= lastcol) low = 0;
2110       else high = nrow;
2111       lastcol = col;
2112       while (high - low > 7) {
2113         t = (low + high) / 2;
2114         if (rp[t] > col) high = t;
2115         else low = t;
2116       }
2117       for (i = low; i < high; i++) {
2118         if (rp[i] > col) break;
2119         if (rp[i] == col) {
2120           if (A->structure_only) goto noinsert2;
2121           bap = ap + bs2 * i;
2122           if (roworiented) {
2123             if (is == ADD_VALUES) {
2124               for (ii = 0; ii < bs; ii++, value += stepval) {
2125                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2126               }
2127             } else {
2128               for (ii = 0; ii < bs; ii++, value += stepval) {
2129                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2130               }
2131             }
2132           } else {
2133             if (is == ADD_VALUES) {
2134               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2135                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2136                 bap += bs;
2137               }
2138             } else {
2139               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2140                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2141                 bap += bs;
2142               }
2143             }
2144           }
2145           goto noinsert2;
2146         }
2147       }
2148       if (nonew == 1) goto noinsert2;
2149       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2150       if (A->structure_only) {
2151         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2152       } else {
2153         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2154       }
2155       N = nrow++ - 1;
2156       high++;
2157       /* shift up all the later entries in this row */
2158       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2159       rp[i] = col;
2160       if (!A->structure_only) {
2161         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2162         bap = ap + bs2 * i;
2163         if (roworiented) {
2164           for (ii = 0; ii < bs; ii++, value += stepval) {
2165             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2166           }
2167         } else {
2168           for (ii = 0; ii < bs; ii++, value += stepval) {
2169             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2170           }
2171         }
2172       }
2173     noinsert2:;
2174       low = i;
2175     }
2176     ailen[row] = nrow;
2177   }
2178   PetscFunctionReturn(PETSC_SUCCESS);
2179 }
2180 
2181 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2182 {
2183   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2184   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2185   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2186   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2187   MatScalar   *aa    = a->a, *ap;
2188   PetscReal    ratio = 0.6;
2189 
2190   PetscFunctionBegin;
2191   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2192 
2193   if (m) rmax = ailen[0];
2194   for (i = 1; i < mbs; i++) {
2195     /* move each row back by the amount of empty slots (fshift) before it*/
2196     fshift += imax[i - 1] - ailen[i - 1];
2197     rmax = PetscMax(rmax, ailen[i]);
2198     if (fshift) {
2199       ip = aj + ai[i];
2200       ap = aa + bs2 * ai[i];
2201       N  = ailen[i];
2202       PetscCall(PetscArraymove(ip - fshift, ip, N));
2203       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2204     }
2205     ai[i] = ai[i - 1] + ailen[i - 1];
2206   }
2207   if (mbs) {
2208     fshift += imax[mbs - 1] - ailen[mbs - 1];
2209     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2210   }
2211 
2212   /* reset ilen and imax for each row */
2213   a->nonzerorowcnt = 0;
2214   if (A->structure_only) {
2215     PetscCall(PetscFree2(a->imax, a->ilen));
2216   } else { /* !A->structure_only */
2217     for (i = 0; i < mbs; i++) {
2218       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2219       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2220     }
2221   }
2222   a->nz = ai[mbs];
2223 
2224   /* diagonals may have moved, so kill the diagonal pointers */
2225   a->idiagvalid = PETSC_FALSE;
2226   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2227   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2228   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2229   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2230 
2231   A->info.mallocs += a->reallocs;
2232   a->reallocs         = 0;
2233   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2234   a->rmax             = rmax;
2235 
2236   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2237   PetscFunctionReturn(PETSC_SUCCESS);
2238 }
2239 
2240 /*
2241    This function returns an array of flags which indicate the locations of contiguous
2242    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2243    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2244    Assume: sizes should be long enough to hold all the values.
2245 */
2246 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2247 {
2248   PetscInt j = 0;
2249 
2250   PetscFunctionBegin;
2251   for (PetscInt i = 0; i < n; j++) {
2252     PetscInt row = idx[i];
2253     if (row % bs != 0) { /* Not the beginning of a block */
2254       sizes[j] = 1;
2255       i++;
2256     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2257       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2258       i++;
2259     } else { /* Beginning of the block, so check if the complete block exists */
2260       PetscBool flg = PETSC_TRUE;
2261       for (PetscInt k = 1; k < bs; k++) {
2262         if (row + k != idx[i + k]) { /* break in the block */
2263           flg = PETSC_FALSE;
2264           break;
2265         }
2266       }
2267       if (flg) { /* No break in the bs */
2268         sizes[j] = bs;
2269         i += bs;
2270       } else {
2271         sizes[j] = 1;
2272         i++;
2273       }
2274     }
2275   }
2276   *bs_max = j;
2277   PetscFunctionReturn(PETSC_SUCCESS);
2278 }
2279 
2280 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2281 {
2282   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2283   PetscInt           i, j, k, count, *rows;
2284   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2285   PetscScalar        zero = 0.0;
2286   MatScalar         *aa;
2287   const PetscScalar *xx;
2288   PetscScalar       *bb;
2289 
2290   PetscFunctionBegin;
2291   /* fix right-hand side if needed */
2292   if (x && b) {
2293     PetscCall(VecGetArrayRead(x, &xx));
2294     PetscCall(VecGetArray(b, &bb));
2295     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2296     PetscCall(VecRestoreArrayRead(x, &xx));
2297     PetscCall(VecRestoreArray(b, &bb));
2298   }
2299 
2300   /* Make a copy of the IS and  sort it */
2301   /* allocate memory for rows,sizes */
2302   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2303 
2304   /* copy IS values to rows, and sort them */
2305   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2306   PetscCall(PetscSortInt(is_n, rows));
2307 
2308   if (baij->keepnonzeropattern) {
2309     for (i = 0; i < is_n; i++) sizes[i] = 1;
2310     bs_max = is_n;
2311   } else {
2312     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2313     A->nonzerostate++;
2314   }
2315 
2316   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2317     row = rows[j];
2318     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2319     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2320     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2321     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2322       if (diag != (PetscScalar)0.0) {
2323         if (baij->ilen[row / bs] > 0) {
2324           baij->ilen[row / bs]       = 1;
2325           baij->j[baij->i[row / bs]] = row / bs;
2326 
2327           PetscCall(PetscArrayzero(aa, count * bs));
2328         }
2329         /* Now insert all the diagonal values for this bs */
2330         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2331       } else { /* (diag == 0.0) */
2332         baij->ilen[row / bs] = 0;
2333       } /* end (diag == 0.0) */
2334     } else { /* (sizes[i] != bs) */
2335       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2336       for (k = 0; k < count; k++) {
2337         aa[0] = zero;
2338         aa += bs;
2339       }
2340       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2341     }
2342   }
2343 
2344   PetscCall(PetscFree2(rows, sizes));
2345   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2346   PetscFunctionReturn(PETSC_SUCCESS);
2347 }
2348 
2349 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2350 {
2351   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2352   PetscInt           i, j, k, count;
2353   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2354   PetscScalar        zero = 0.0;
2355   MatScalar         *aa;
2356   const PetscScalar *xx;
2357   PetscScalar       *bb;
2358   PetscBool         *zeroed, vecs = PETSC_FALSE;
2359 
2360   PetscFunctionBegin;
2361   /* fix right-hand side if needed */
2362   if (x && b) {
2363     PetscCall(VecGetArrayRead(x, &xx));
2364     PetscCall(VecGetArray(b, &bb));
2365     vecs = PETSC_TRUE;
2366   }
2367 
2368   /* zero the columns */
2369   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2370   for (i = 0; i < is_n; i++) {
2371     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2372     zeroed[is_idx[i]] = PETSC_TRUE;
2373   }
2374   for (i = 0; i < A->rmap->N; i++) {
2375     if (!zeroed[i]) {
2376       row = i / bs;
2377       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2378         for (k = 0; k < bs; k++) {
2379           col = bs * baij->j[j] + k;
2380           if (zeroed[col]) {
2381             aa = baij->a + j * bs2 + (i % bs) + bs * k;
2382             if (vecs) bb[i] -= aa[0] * xx[col];
2383             aa[0] = 0.0;
2384           }
2385         }
2386       }
2387     } else if (vecs) bb[i] = diag * xx[i];
2388   }
2389   PetscCall(PetscFree(zeroed));
2390   if (vecs) {
2391     PetscCall(VecRestoreArrayRead(x, &xx));
2392     PetscCall(VecRestoreArray(b, &bb));
2393   }
2394 
2395   /* zero the rows */
2396   for (i = 0; i < is_n; i++) {
2397     row   = is_idx[i];
2398     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2399     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2400     for (k = 0; k < count; k++) {
2401       aa[0] = zero;
2402       aa += bs;
2403     }
2404     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2405   }
2406   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2407   PetscFunctionReturn(PETSC_SUCCESS);
2408 }
2409 
2410 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2411 {
2412   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2413   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2414   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2415   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2416   PetscInt     ridx, cidx, bs2                 = a->bs2;
2417   PetscBool    roworiented = a->roworiented;
2418   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2419 
2420   PetscFunctionBegin;
2421   for (k = 0; k < m; k++) { /* loop over added rows */
2422     row  = im[k];
2423     brow = row / bs;
2424     if (row < 0) continue;
2425     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2426     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2427     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2428     rmax = imax[brow];
2429     nrow = ailen[brow];
2430     low  = 0;
2431     high = nrow;
2432     for (l = 0; l < n; l++) { /* loop over added columns */
2433       if (in[l] < 0) continue;
2434       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2435       col  = in[l];
2436       bcol = col / bs;
2437       ridx = row % bs;
2438       cidx = col % bs;
2439       if (!A->structure_only) {
2440         if (roworiented) {
2441           value = v[l + k * n];
2442         } else {
2443           value = v[k + l * m];
2444         }
2445       }
2446       if (col <= lastcol) low = 0;
2447       else high = nrow;
2448       lastcol = col;
2449       while (high - low > 7) {
2450         t = (low + high) / 2;
2451         if (rp[t] > bcol) high = t;
2452         else low = t;
2453       }
2454       for (i = low; i < high; i++) {
2455         if (rp[i] > bcol) break;
2456         if (rp[i] == bcol) {
2457           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2458           if (!A->structure_only) {
2459             if (is == ADD_VALUES) *bap += value;
2460             else *bap = value;
2461           }
2462           goto noinsert1;
2463         }
2464       }
2465       if (nonew == 1) goto noinsert1;
2466       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2467       if (A->structure_only) {
2468         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2469       } else {
2470         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2471       }
2472       N = nrow++ - 1;
2473       high++;
2474       /* shift up all the later entries in this row */
2475       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2476       rp[i] = bcol;
2477       if (!A->structure_only) {
2478         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2479         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2480         ap[bs2 * i + bs * cidx + ridx] = value;
2481       }
2482       a->nz++;
2483     noinsert1:;
2484       low = i;
2485     }
2486     ailen[brow] = nrow;
2487   }
2488   PetscFunctionReturn(PETSC_SUCCESS);
2489 }
2490 
2491 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2492 {
2493   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2494   Mat          outA;
2495   PetscBool    row_identity, col_identity;
2496 
2497   PetscFunctionBegin;
2498   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2499   PetscCall(ISIdentity(row, &row_identity));
2500   PetscCall(ISIdentity(col, &col_identity));
2501   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2502 
2503   outA            = inA;
2504   inA->factortype = MAT_FACTOR_LU;
2505   PetscCall(PetscFree(inA->solvertype));
2506   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2507 
2508   PetscCall(PetscObjectReference((PetscObject)row));
2509   PetscCall(ISDestroy(&a->row));
2510   a->row = row;
2511   PetscCall(PetscObjectReference((PetscObject)col));
2512   PetscCall(ISDestroy(&a->col));
2513   a->col = col;
2514 
2515   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2516   PetscCall(ISDestroy(&a->icol));
2517   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2518 
2519   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2520   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2521   PetscCall(MatLUFactorNumeric(outA, inA, info));
2522   PetscFunctionReturn(PETSC_SUCCESS);
2523 }
2524 
2525 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2526 {
2527   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2528 
2529   PetscFunctionBegin;
2530   baij->nz = baij->maxnz;
2531   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2532   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2533   PetscFunctionReturn(PETSC_SUCCESS);
2534 }
2535 
2536 /*@
2537   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2538 
2539   Input Parameters:
2540 + mat     - the `MATSEQBAIJ` matrix
2541 - indices - the block column indices
2542 
2543   Level: advanced
2544 
2545   Notes:
2546   This can be called if you have precomputed the nonzero structure of the
2547   matrix and want to provide it to the matrix object to improve the performance
2548   of the `MatSetValues()` operation.
2549 
2550   You MUST have set the correct numbers of nonzeros per row in the call to
2551   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2552 
2553   MUST be called before any calls to `MatSetValues()`
2554 
2555 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2556 @*/
2557 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2558 {
2559   PetscFunctionBegin;
2560   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2561   PetscAssertPointer(indices, 2);
2562   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2567 {
2568   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2569   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2570   PetscReal    atmp;
2571   PetscScalar *x, zero = 0.0;
2572   MatScalar   *aa;
2573   PetscInt     ncols, brow, krow, kcol;
2574 
2575   PetscFunctionBegin;
2576   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2577   bs  = A->rmap->bs;
2578   aa  = a->a;
2579   ai  = a->i;
2580   aj  = a->j;
2581   mbs = a->mbs;
2582 
2583   PetscCall(VecSet(v, zero));
2584   PetscCall(VecGetArray(v, &x));
2585   PetscCall(VecGetLocalSize(v, &n));
2586   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2587   for (i = 0; i < mbs; i++) {
2588     ncols = ai[1] - ai[0];
2589     ai++;
2590     brow = bs * i;
2591     for (j = 0; j < ncols; j++) {
2592       for (kcol = 0; kcol < bs; kcol++) {
2593         for (krow = 0; krow < bs; krow++) {
2594           atmp = PetscAbsScalar(*aa);
2595           aa++;
2596           row = brow + krow; /* row index */
2597           if (PetscAbsScalar(x[row]) < atmp) {
2598             x[row] = atmp;
2599             if (idx) idx[row] = bs * (*aj) + kcol;
2600           }
2601         }
2602       }
2603       aj++;
2604     }
2605   }
2606   PetscCall(VecRestoreArray(v, &x));
2607   PetscFunctionReturn(PETSC_SUCCESS);
2608 }
2609 
2610 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2611 {
2612   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2613   PetscInt     i, j, n, row, bs, *ai, mbs;
2614   PetscReal    atmp;
2615   PetscScalar *x, zero = 0.0;
2616   MatScalar   *aa;
2617   PetscInt     ncols, brow, krow, kcol;
2618 
2619   PetscFunctionBegin;
2620   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2621   bs  = A->rmap->bs;
2622   aa  = a->a;
2623   ai  = a->i;
2624   mbs = a->mbs;
2625 
2626   PetscCall(VecSet(v, zero));
2627   PetscCall(VecGetArrayWrite(v, &x));
2628   PetscCall(VecGetLocalSize(v, &n));
2629   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2630   for (i = 0; i < mbs; i++) {
2631     ncols = ai[1] - ai[0];
2632     ai++;
2633     brow = bs * i;
2634     for (j = 0; j < ncols; j++) {
2635       for (kcol = 0; kcol < bs; kcol++) {
2636         for (krow = 0; krow < bs; krow++) {
2637           atmp = PetscAbsScalar(*aa);
2638           aa++;
2639           row = brow + krow; /* row index */
2640           x[row] += atmp;
2641         }
2642       }
2643     }
2644   }
2645   PetscCall(VecRestoreArrayWrite(v, &x));
2646   PetscFunctionReturn(PETSC_SUCCESS);
2647 }
2648 
2649 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2650 {
2651   PetscFunctionBegin;
2652   /* If the two matrices have the same copy implementation, use fast copy. */
2653   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2654     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2655     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2656     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2657 
2658     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2659     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2660     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2661     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2662   } else {
2663     PetscCall(MatCopy_Basic(A, B, str));
2664   }
2665   PetscFunctionReturn(PETSC_SUCCESS);
2666 }
2667 
2668 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2669 {
2670   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2671 
2672   PetscFunctionBegin;
2673   *array = a->a;
2674   PetscFunctionReturn(PETSC_SUCCESS);
2675 }
2676 
2677 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2678 {
2679   PetscFunctionBegin;
2680   *array = NULL;
2681   PetscFunctionReturn(PETSC_SUCCESS);
2682 }
2683 
2684 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2685 {
2686   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2687   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2688   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2689 
2690   PetscFunctionBegin;
2691   /* Set the number of nonzeros in the new matrix */
2692   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2693   PetscFunctionReturn(PETSC_SUCCESS);
2694 }
2695 
2696 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2697 {
2698   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2699   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2700   PetscBLASInt one = 1;
2701 
2702   PetscFunctionBegin;
2703   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2704     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2705     if (e) {
2706       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2707       if (e) {
2708         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2709         if (e) str = SAME_NONZERO_PATTERN;
2710       }
2711     }
2712     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2713   }
2714   if (str == SAME_NONZERO_PATTERN) {
2715     PetscScalar  alpha = a;
2716     PetscBLASInt bnz;
2717     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2718     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2719     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2720   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2721     PetscCall(MatAXPY_Basic(Y, a, X, str));
2722   } else {
2723     Mat       B;
2724     PetscInt *nnz;
2725     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2726     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2727     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2728     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2729     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2730     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2731     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2732     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2733     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2734     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2735     PetscCall(MatHeaderMerge(Y, &B));
2736     PetscCall(PetscFree(nnz));
2737   }
2738   PetscFunctionReturn(PETSC_SUCCESS);
2739 }
2740 
2741 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2742 {
2743 #if PetscDefined(USE_COMPLEX)
2744   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2745   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2746   MatScalar   *aa = a->a;
2747 
2748   PetscFunctionBegin;
2749   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2750   PetscFunctionReturn(PETSC_SUCCESS);
2751 #else
2752   (void)A;
2753   return PETSC_SUCCESS;
2754 #endif
2755 }
2756 
2757 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2758 {
2759 #if PetscDefined(USE_COMPLEX)
2760   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2761   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2762   MatScalar   *aa = a->a;
2763 
2764   PetscFunctionBegin;
2765   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2766   PetscFunctionReturn(PETSC_SUCCESS);
2767 #else
2768   (void)A;
2769   return PETSC_SUCCESS;
2770 #endif
2771 }
2772 
2773 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2774 {
2775 #if PetscDefined(USE_COMPLEX)
2776   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2777   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2778   MatScalar   *aa = a->a;
2779 
2780   PetscFunctionBegin;
2781   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2782   PetscFunctionReturn(PETSC_SUCCESS);
2783 #else
2784   (void)A;
2785   return PETSC_SUCCESS;
2786 #endif
2787 }
2788 
2789 /*
2790     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2791 */
2792 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2793 {
2794   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2795   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2796   PetscInt     nz = a->i[m], row, *jj, mr, col;
2797 
2798   PetscFunctionBegin;
2799   *nn = n;
2800   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2801   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2802   PetscCall(PetscCalloc1(n, &collengths));
2803   PetscCall(PetscMalloc1(n + 1, &cia));
2804   PetscCall(PetscMalloc1(nz, &cja));
2805   jj = a->j;
2806   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2807   cia[0] = oshift;
2808   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2809   PetscCall(PetscArrayzero(collengths, n));
2810   jj = a->j;
2811   for (row = 0; row < m; row++) {
2812     mr = a->i[row + 1] - a->i[row];
2813     for (i = 0; i < mr; i++) {
2814       col = *jj++;
2815 
2816       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2817     }
2818   }
2819   PetscCall(PetscFree(collengths));
2820   *ia = cia;
2821   *ja = cja;
2822   PetscFunctionReturn(PETSC_SUCCESS);
2823 }
2824 
2825 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2826 {
2827   PetscFunctionBegin;
2828   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2829   PetscCall(PetscFree(*ia));
2830   PetscCall(PetscFree(*ja));
2831   PetscFunctionReturn(PETSC_SUCCESS);
2832 }
2833 
2834 /*
2835  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2836  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2837  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2838  */
2839 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2840 {
2841   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2842   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2843   PetscInt     nz = a->i[m], row, *jj, mr, col;
2844   PetscInt    *cspidx;
2845 
2846   PetscFunctionBegin;
2847   *nn = n;
2848   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2849 
2850   PetscCall(PetscCalloc1(n, &collengths));
2851   PetscCall(PetscMalloc1(n + 1, &cia));
2852   PetscCall(PetscMalloc1(nz, &cja));
2853   PetscCall(PetscMalloc1(nz, &cspidx));
2854   jj = a->j;
2855   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2856   cia[0] = oshift;
2857   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2858   PetscCall(PetscArrayzero(collengths, n));
2859   jj = a->j;
2860   for (row = 0; row < m; row++) {
2861     mr = a->i[row + 1] - a->i[row];
2862     for (i = 0; i < mr; i++) {
2863       col                                         = *jj++;
2864       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2865       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2866     }
2867   }
2868   PetscCall(PetscFree(collengths));
2869   *ia    = cia;
2870   *ja    = cja;
2871   *spidx = cspidx;
2872   PetscFunctionReturn(PETSC_SUCCESS);
2873 }
2874 
2875 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2876 {
2877   PetscFunctionBegin;
2878   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2879   PetscCall(PetscFree(*spidx));
2880   PetscFunctionReturn(PETSC_SUCCESS);
2881 }
2882 
2883 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2884 {
2885   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2886 
2887   PetscFunctionBegin;
2888   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2889   PetscCall(MatShift_Basic(Y, a));
2890   PetscFunctionReturn(PETSC_SUCCESS);
2891 }
2892 
2893 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2894 {
2895   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2896   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2897   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2898   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2899   MatScalar   *aa = a->a, *ap;
2900   PetscBool    zero;
2901 
2902   PetscFunctionBegin;
2903   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2904   if (m) rmax = ailen[0];
2905   for (i = 1; i <= mbs; i++) {
2906     for (k = ai[i - 1]; k < ai[i]; k++) {
2907       zero = PETSC_TRUE;
2908       ap   = aa + bs2 * k;
2909       for (j = 0; j < bs2 && zero; j++) {
2910         if (ap[j] != 0.0) zero = PETSC_FALSE;
2911       }
2912       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2913       else {
2914         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2915         aj[k - fshift] = aj[k];
2916         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2917       }
2918     }
2919     ai[i - 1] -= fshift_prev;
2920     fshift_prev  = fshift;
2921     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2922     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2923     rmax = PetscMax(rmax, ailen[i - 1]);
2924   }
2925   if (fshift) {
2926     if (mbs) {
2927       ai[mbs] -= fshift;
2928       a->nz = ai[mbs];
2929     }
2930     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2931     A->nonzerostate++;
2932     A->info.nz_unneeded += (PetscReal)fshift;
2933     a->rmax = rmax;
2934     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2935     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2936   }
2937   PetscFunctionReturn(PETSC_SUCCESS);
2938 }
2939 
2940 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2941                                        MatGetRow_SeqBAIJ,
2942                                        MatRestoreRow_SeqBAIJ,
2943                                        MatMult_SeqBAIJ_N,
2944                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2945                                        MatMultTranspose_SeqBAIJ,
2946                                        MatMultTransposeAdd_SeqBAIJ,
2947                                        NULL,
2948                                        NULL,
2949                                        NULL,
2950                                        /* 10*/ NULL,
2951                                        MatLUFactor_SeqBAIJ,
2952                                        NULL,
2953                                        NULL,
2954                                        MatTranspose_SeqBAIJ,
2955                                        /* 15*/ MatGetInfo_SeqBAIJ,
2956                                        MatEqual_SeqBAIJ,
2957                                        MatGetDiagonal_SeqBAIJ,
2958                                        MatDiagonalScale_SeqBAIJ,
2959                                        MatNorm_SeqBAIJ,
2960                                        /* 20*/ NULL,
2961                                        MatAssemblyEnd_SeqBAIJ,
2962                                        MatSetOption_SeqBAIJ,
2963                                        MatZeroEntries_SeqBAIJ,
2964                                        /* 24*/ MatZeroRows_SeqBAIJ,
2965                                        NULL,
2966                                        NULL,
2967                                        NULL,
2968                                        NULL,
2969                                        /* 29*/ MatSetUp_Seq_Hash,
2970                                        NULL,
2971                                        NULL,
2972                                        NULL,
2973                                        NULL,
2974                                        /* 34*/ MatDuplicate_SeqBAIJ,
2975                                        NULL,
2976                                        NULL,
2977                                        MatILUFactor_SeqBAIJ,
2978                                        NULL,
2979                                        /* 39*/ MatAXPY_SeqBAIJ,
2980                                        MatCreateSubMatrices_SeqBAIJ,
2981                                        MatIncreaseOverlap_SeqBAIJ,
2982                                        MatGetValues_SeqBAIJ,
2983                                        MatCopy_SeqBAIJ,
2984                                        /* 44*/ NULL,
2985                                        MatScale_SeqBAIJ,
2986                                        MatShift_SeqBAIJ,
2987                                        NULL,
2988                                        MatZeroRowsColumns_SeqBAIJ,
2989                                        /* 49*/ NULL,
2990                                        MatGetRowIJ_SeqBAIJ,
2991                                        MatRestoreRowIJ_SeqBAIJ,
2992                                        MatGetColumnIJ_SeqBAIJ,
2993                                        MatRestoreColumnIJ_SeqBAIJ,
2994                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
2995                                        NULL,
2996                                        NULL,
2997                                        NULL,
2998                                        MatSetValuesBlocked_SeqBAIJ,
2999                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3000                                        MatDestroy_SeqBAIJ,
3001                                        MatView_SeqBAIJ,
3002                                        NULL,
3003                                        NULL,
3004                                        /* 64*/ NULL,
3005                                        NULL,
3006                                        NULL,
3007                                        NULL,
3008                                        MatGetRowMaxAbs_SeqBAIJ,
3009                                        /* 69*/ NULL,
3010                                        MatConvert_Basic,
3011                                        NULL,
3012                                        MatFDColoringApply_BAIJ,
3013                                        NULL,
3014                                        /* 74*/ NULL,
3015                                        NULL,
3016                                        NULL,
3017                                        NULL,
3018                                        MatLoad_SeqBAIJ,
3019                                        /* 79*/ NULL,
3020                                        NULL,
3021                                        NULL,
3022                                        NULL,
3023                                        NULL,
3024                                        /* 84*/ NULL,
3025                                        NULL,
3026                                        NULL,
3027                                        NULL,
3028                                        NULL,
3029                                        /* 89*/ NULL,
3030                                        NULL,
3031                                        NULL,
3032                                        NULL,
3033                                        MatConjugate_SeqBAIJ,
3034                                        /* 94*/ NULL,
3035                                        NULL,
3036                                        MatRealPart_SeqBAIJ,
3037                                        MatImaginaryPart_SeqBAIJ,
3038                                        NULL,
3039                                        /* 99*/ NULL,
3040                                        NULL,
3041                                        NULL,
3042                                        NULL,
3043                                        NULL,
3044                                        /*104*/ NULL,
3045                                        NULL,
3046                                        NULL,
3047                                        NULL,
3048                                        NULL,
3049                                        /*109*/ NULL,
3050                                        NULL,
3051                                        MatMultHermitianTranspose_SeqBAIJ,
3052                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3053                                        NULL,
3054                                        /*114*/ NULL,
3055                                        MatGetColumnReductions_SeqBAIJ,
3056                                        MatInvertBlockDiagonal_SeqBAIJ,
3057                                        NULL,
3058                                        NULL,
3059                                        /*119*/ NULL,
3060                                        NULL,
3061                                        NULL,
3062                                        NULL,
3063                                        NULL,
3064                                        /*124*/ NULL,
3065                                        NULL,
3066                                        MatSetBlockSizes_Default,
3067                                        NULL,
3068                                        MatFDColoringSetUp_SeqXAIJ,
3069                                        /*129*/ NULL,
3070                                        MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3071                                        MatDestroySubMatrices_SeqBAIJ,
3072                                        NULL,
3073                                        NULL,
3074                                        /*134*/ NULL,
3075                                        NULL,
3076                                        MatEliminateZeros_SeqBAIJ,
3077                                        MatGetRowSumAbs_SeqBAIJ,
3078                                        NULL,
3079                                        /*139*/ NULL,
3080                                        NULL,
3081                                        MatCopyHashToXAIJ_Seq_Hash,
3082                                        NULL,
3083                                        NULL};
3084 
3085 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3086 {
3087   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3088   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3089 
3090   PetscFunctionBegin;
3091   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3092 
3093   /* allocate space for values if not already there */
3094   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3095 
3096   /* copy values over */
3097   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3098   PetscFunctionReturn(PETSC_SUCCESS);
3099 }
3100 
3101 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3102 {
3103   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3104   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3105 
3106   PetscFunctionBegin;
3107   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3108   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3109 
3110   /* copy values over */
3111   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3112   PetscFunctionReturn(PETSC_SUCCESS);
3113 }
3114 
3115 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3116 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3117 
3118 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3119 {
3120   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3121   PetscInt     i, mbs, nbs, bs2;
3122   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3123 
3124   PetscFunctionBegin;
3125   if (B->hash_active) {
3126     PetscInt bs;
3127     B->ops[0] = b->cops;
3128     PetscCall(PetscHMapIJVDestroy(&b->ht));
3129     PetscCall(MatGetBlockSize(B, &bs));
3130     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3131     PetscCall(PetscFree(b->dnz));
3132     PetscCall(PetscFree(b->bdnz));
3133     B->hash_active = PETSC_FALSE;
3134   }
3135   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3136   if (nz == MAT_SKIP_ALLOCATION) {
3137     skipallocation = PETSC_TRUE;
3138     nz             = 0;
3139   }
3140 
3141   PetscCall(MatSetBlockSize(B, bs));
3142   PetscCall(PetscLayoutSetUp(B->rmap));
3143   PetscCall(PetscLayoutSetUp(B->cmap));
3144   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3145 
3146   B->preallocated = PETSC_TRUE;
3147 
3148   mbs = B->rmap->n / bs;
3149   nbs = B->cmap->n / bs;
3150   bs2 = bs * bs;
3151 
3152   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3153 
3154   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3155   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3156   if (nnz) {
3157     for (i = 0; i < mbs; i++) {
3158       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3159       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3160     }
3161   }
3162 
3163   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3164   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3165   PetscOptionsEnd();
3166 
3167   if (!flg) {
3168     switch (bs) {
3169     case 1:
3170       B->ops->mult    = MatMult_SeqBAIJ_1;
3171       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3172       break;
3173     case 2:
3174       B->ops->mult    = MatMult_SeqBAIJ_2;
3175       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3176       break;
3177     case 3:
3178       B->ops->mult    = MatMult_SeqBAIJ_3;
3179       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3180       break;
3181     case 4:
3182       B->ops->mult    = MatMult_SeqBAIJ_4;
3183       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3184       break;
3185     case 5:
3186       B->ops->mult    = MatMult_SeqBAIJ_5;
3187       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3188       break;
3189     case 6:
3190       B->ops->mult    = MatMult_SeqBAIJ_6;
3191       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3192       break;
3193     case 7:
3194       B->ops->mult    = MatMult_SeqBAIJ_7;
3195       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3196       break;
3197     case 9: {
3198       PetscInt version = 1;
3199       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3200       switch (version) {
3201 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3202       case 1:
3203         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3204         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3205         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3206         break;
3207 #endif
3208       default:
3209         B->ops->mult    = MatMult_SeqBAIJ_N;
3210         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3211         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3212         break;
3213       }
3214       break;
3215     }
3216     case 11:
3217       B->ops->mult    = MatMult_SeqBAIJ_11;
3218       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3219       break;
3220     case 12: {
3221       PetscInt version = 1;
3222       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3223       switch (version) {
3224       case 1:
3225         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3226         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3227         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3228         break;
3229       case 2:
3230         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3231         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3232         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3233         break;
3234 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3235       case 3:
3236         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3237         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3238         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3239         break;
3240 #endif
3241       default:
3242         B->ops->mult    = MatMult_SeqBAIJ_N;
3243         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3244         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3245         break;
3246       }
3247       break;
3248     }
3249     case 15: {
3250       PetscInt version = 1;
3251       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3252       switch (version) {
3253       case 1:
3254         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3255         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3256         break;
3257       case 2:
3258         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3259         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3260         break;
3261       case 3:
3262         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3263         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3264         break;
3265       case 4:
3266         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3267         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3268         break;
3269       default:
3270         B->ops->mult = MatMult_SeqBAIJ_N;
3271         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3272         break;
3273       }
3274       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3275       break;
3276     }
3277     default:
3278       B->ops->mult    = MatMult_SeqBAIJ_N;
3279       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3280       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3281       break;
3282     }
3283   }
3284   B->ops->sor = MatSOR_SeqBAIJ;
3285   b->mbs      = mbs;
3286   b->nbs      = nbs;
3287   if (!skipallocation) {
3288     if (!b->imax) {
3289       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3290 
3291       b->free_imax_ilen = PETSC_TRUE;
3292     }
3293     /* b->ilen will count nonzeros in each block row so far. */
3294     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3295     if (!nnz) {
3296       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3297       else if (nz < 0) nz = 1;
3298       nz = PetscMin(nz, nbs);
3299       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3300       PetscCall(PetscIntMultError(nz, mbs, &nz));
3301     } else {
3302       PetscInt64 nz64 = 0;
3303       for (i = 0; i < mbs; i++) {
3304         b->imax[i] = nnz[i];
3305         nz64 += nnz[i];
3306       }
3307       PetscCall(PetscIntCast(nz64, &nz));
3308     }
3309 
3310     /* allocate the matrix space */
3311     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3312     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3313     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3314     if (B->structure_only) {
3315       b->free_a = PETSC_FALSE;
3316     } else {
3317       PetscInt nzbs2 = 0;
3318       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3319       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3320       b->free_a = PETSC_TRUE;
3321       PetscCall(PetscArrayzero(b->a, nz * bs2));
3322     }
3323     b->free_ij = PETSC_TRUE;
3324     PetscCall(PetscArrayzero(b->j, nz));
3325 
3326     b->i[0] = 0;
3327     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3328   } else {
3329     b->free_a  = PETSC_FALSE;
3330     b->free_ij = PETSC_FALSE;
3331   }
3332 
3333   b->bs2              = bs2;
3334   b->mbs              = mbs;
3335   b->nz               = 0;
3336   b->maxnz            = nz;
3337   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3338   B->was_assembled    = PETSC_FALSE;
3339   B->assembled        = PETSC_FALSE;
3340   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3341   PetscFunctionReturn(PETSC_SUCCESS);
3342 }
3343 
3344 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3345 {
3346   PetscInt     i, m, nz, nz_max = 0, *nnz;
3347   PetscScalar *values      = NULL;
3348   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3349 
3350   PetscFunctionBegin;
3351   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3352   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3353   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3354   PetscCall(PetscLayoutSetUp(B->rmap));
3355   PetscCall(PetscLayoutSetUp(B->cmap));
3356   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3357   m = B->rmap->n / bs;
3358 
3359   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3360   PetscCall(PetscMalloc1(m + 1, &nnz));
3361   for (i = 0; i < m; i++) {
3362     nz = ii[i + 1] - ii[i];
3363     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3364     nz_max = PetscMax(nz_max, nz);
3365     nnz[i] = nz;
3366   }
3367   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3368   PetscCall(PetscFree(nnz));
3369 
3370   values = (PetscScalar *)V;
3371   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3372   for (i = 0; i < m; i++) {
3373     PetscInt        ncols = ii[i + 1] - ii[i];
3374     const PetscInt *icols = jj + ii[i];
3375     if (bs == 1 || !roworiented) {
3376       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3377       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3378     } else {
3379       PetscInt j;
3380       for (j = 0; j < ncols; j++) {
3381         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3382         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3383       }
3384     }
3385   }
3386   if (!V) PetscCall(PetscFree(values));
3387   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3388   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3389   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3390   PetscFunctionReturn(PETSC_SUCCESS);
3391 }
3392 
3393 /*@C
3394   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3395 
3396   Not Collective
3397 
3398   Input Parameter:
3399 . A - a `MATSEQBAIJ` matrix
3400 
3401   Output Parameter:
3402 . array - pointer to the data
3403 
3404   Level: intermediate
3405 
3406 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3407 @*/
3408 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3409 {
3410   PetscFunctionBegin;
3411   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3412   PetscFunctionReturn(PETSC_SUCCESS);
3413 }
3414 
3415 /*@C
3416   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3417 
3418   Not Collective
3419 
3420   Input Parameters:
3421 + A     - a `MATSEQBAIJ` matrix
3422 - array - pointer to the data
3423 
3424   Level: intermediate
3425 
3426 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3427 @*/
3428 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3429 {
3430   PetscFunctionBegin;
3431   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3432   PetscFunctionReturn(PETSC_SUCCESS);
3433 }
3434 
3435 /*MC
3436    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3437    block sparse compressed row format.
3438 
3439    Options Database Keys:
3440 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3441 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3442 
3443    Level: beginner
3444 
3445    Notes:
3446    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3447    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3448 
3449    Run with `-info` to see what version of the matrix-vector product is being used
3450 
3451 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3452 M*/
3453 
3454 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3455 
3456 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3457 {
3458   PetscMPIInt  size;
3459   Mat_SeqBAIJ *b;
3460 
3461   PetscFunctionBegin;
3462   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3463   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3464 
3465   PetscCall(PetscNew(&b));
3466   B->data   = (void *)b;
3467   B->ops[0] = MatOps_Values;
3468 
3469   b->row          = NULL;
3470   b->col          = NULL;
3471   b->icol         = NULL;
3472   b->reallocs     = 0;
3473   b->saved_values = NULL;
3474 
3475   b->roworiented        = PETSC_TRUE;
3476   b->nonew              = 0;
3477   b->diag               = NULL;
3478   B->spptr              = NULL;
3479   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3480   b->keepnonzeropattern = PETSC_FALSE;
3481 
3482   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3483   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3484   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3485   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3486   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3487   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3488   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3489   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3490   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3491   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3492 #if defined(PETSC_HAVE_HYPRE)
3493   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3494 #endif
3495   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3496   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3497   PetscFunctionReturn(PETSC_SUCCESS);
3498 }
3499 
3500 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3501 {
3502   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3503   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3504 
3505   PetscFunctionBegin;
3506   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3507   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3508 
3509   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3510     c->imax           = a->imax;
3511     c->ilen           = a->ilen;
3512     c->free_imax_ilen = PETSC_FALSE;
3513   } else {
3514     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3515     for (i = 0; i < mbs; i++) {
3516       c->imax[i] = a->imax[i];
3517       c->ilen[i] = a->ilen[i];
3518     }
3519     c->free_imax_ilen = PETSC_TRUE;
3520   }
3521 
3522   /* allocate the matrix space */
3523   if (mallocmatspace) {
3524     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3525       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3526       PetscCall(PetscArrayzero(c->a, bs2 * nz));
3527       c->free_a       = PETSC_TRUE;
3528       c->i            = a->i;
3529       c->j            = a->j;
3530       c->free_ij      = PETSC_FALSE;
3531       c->parent       = A;
3532       C->preallocated = PETSC_TRUE;
3533       C->assembled    = PETSC_TRUE;
3534 
3535       PetscCall(PetscObjectReference((PetscObject)A));
3536       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3537       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3538     } else {
3539       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3540       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3541       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3542       c->free_a  = PETSC_TRUE;
3543       c->free_ij = PETSC_TRUE;
3544 
3545       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3546       if (mbs > 0) {
3547         PetscCall(PetscArraycpy(c->j, a->j, nz));
3548         if (cpvalues == MAT_COPY_VALUES) {
3549           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3550         } else {
3551           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3552         }
3553       }
3554       C->preallocated = PETSC_TRUE;
3555       C->assembled    = PETSC_TRUE;
3556     }
3557   }
3558 
3559   c->roworiented = a->roworiented;
3560   c->nonew       = a->nonew;
3561 
3562   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3563   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3564 
3565   c->bs2        = a->bs2;
3566   c->mbs        = a->mbs;
3567   c->nbs        = a->nbs;
3568   c->nz         = a->nz;
3569   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3570   c->solve_work = NULL;
3571   c->mult_work  = NULL;
3572   c->sor_workt  = NULL;
3573   c->sor_work   = NULL;
3574 
3575   c->compressedrow.use   = a->compressedrow.use;
3576   c->compressedrow.nrows = a->compressedrow.nrows;
3577   if (a->compressedrow.use) {
3578     i = a->compressedrow.nrows;
3579     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3580     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3581     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3582   } else {
3583     c->compressedrow.use    = PETSC_FALSE;
3584     c->compressedrow.i      = NULL;
3585     c->compressedrow.rindex = NULL;
3586   }
3587   c->nonzerorowcnt = a->nonzerorowcnt;
3588   C->nonzerostate  = A->nonzerostate;
3589 
3590   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3591   PetscFunctionReturn(PETSC_SUCCESS);
3592 }
3593 
3594 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3595 {
3596   PetscFunctionBegin;
3597   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3598   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3599   PetscCall(MatSetType(*B, MATSEQBAIJ));
3600   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3601   PetscFunctionReturn(PETSC_SUCCESS);
3602 }
3603 
3604 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3605 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3606 {
3607   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3608   PetscInt    *rowidxs, *colidxs;
3609   PetscScalar *matvals;
3610 
3611   PetscFunctionBegin;
3612   PetscCall(PetscViewerSetUp(viewer));
3613 
3614   /* read matrix header */
3615   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3616   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3617   M  = header[1];
3618   N  = header[2];
3619   nz = header[3];
3620   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3621   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3622   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3623 
3624   /* set block sizes from the viewer's .info file */
3625   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3626   /* set local and global sizes if not set already */
3627   if (mat->rmap->n < 0) mat->rmap->n = M;
3628   if (mat->cmap->n < 0) mat->cmap->n = N;
3629   if (mat->rmap->N < 0) mat->rmap->N = M;
3630   if (mat->cmap->N < 0) mat->cmap->N = N;
3631   PetscCall(PetscLayoutSetUp(mat->rmap));
3632   PetscCall(PetscLayoutSetUp(mat->cmap));
3633 
3634   /* check if the matrix sizes are correct */
3635   PetscCall(MatGetSize(mat, &rows, &cols));
3636   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3637   PetscCall(MatGetBlockSize(mat, &bs));
3638   PetscCall(MatGetLocalSize(mat, &m, &n));
3639   mbs = m / bs;
3640   nbs = n / bs;
3641 
3642   /* read in row lengths, column indices and nonzero values */
3643   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3644   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3645   rowidxs[0] = 0;
3646   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3647   sum = rowidxs[m];
3648   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3649 
3650   /* read in column indices and nonzero values */
3651   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3652   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3653   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3654 
3655   {               /* preallocate matrix storage */
3656     PetscBT   bt; /* helper bit set to count nonzeros */
3657     PetscInt *nnz;
3658     PetscBool sbaij;
3659 
3660     PetscCall(PetscBTCreate(nbs, &bt));
3661     PetscCall(PetscCalloc1(mbs, &nnz));
3662     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3663     for (i = 0; i < mbs; i++) {
3664       PetscCall(PetscBTMemzero(nbs, bt));
3665       for (k = 0; k < bs; k++) {
3666         PetscInt row = bs * i + k;
3667         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3668           PetscInt col = colidxs[j];
3669           if (!sbaij || col >= row)
3670             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3671         }
3672       }
3673     }
3674     PetscCall(PetscBTDestroy(&bt));
3675     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3676     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3677     PetscCall(PetscFree(nnz));
3678   }
3679 
3680   /* store matrix values */
3681   for (i = 0; i < m; i++) {
3682     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3683     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3684   }
3685 
3686   PetscCall(PetscFree(rowidxs));
3687   PetscCall(PetscFree2(colidxs, matvals));
3688   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3689   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3690   PetscFunctionReturn(PETSC_SUCCESS);
3691 }
3692 
3693 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3694 {
3695   PetscBool isbinary;
3696 
3697   PetscFunctionBegin;
3698   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3699   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3700   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3701   PetscFunctionReturn(PETSC_SUCCESS);
3702 }
3703 
3704 /*@
3705   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3706   compressed row) format.  For good matrix assembly performance the
3707   user should preallocate the matrix storage by setting the parameter `nz`
3708   (or the array `nnz`).
3709 
3710   Collective
3711 
3712   Input Parameters:
3713 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3714 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3715          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3716 . m    - number of rows
3717 . n    - number of columns
3718 . nz   - number of nonzero blocks  per block row (same for all rows)
3719 - nnz  - array containing the number of nonzero blocks in the various block rows
3720          (possibly different for each block row) or `NULL`
3721 
3722   Output Parameter:
3723 . A - the matrix
3724 
3725   Options Database Keys:
3726 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3727 - -mat_block_size - size of the blocks to use
3728 
3729   Level: intermediate
3730 
3731   Notes:
3732   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3733   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3734   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3735 
3736   The number of rows and columns must be divisible by blocksize.
3737 
3738   If the `nnz` parameter is given then the `nz` parameter is ignored
3739 
3740   A nonzero block is any block that as 1 or more nonzeros in it
3741 
3742   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3743   storage.  That is, the stored row and column indices can begin at
3744   either one (as in Fortran) or zero.
3745 
3746   Specify the preallocated storage with either `nz` or `nnz` (not both).
3747   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3748   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3749   matrices.
3750 
3751 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3752 @*/
3753 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3754 {
3755   PetscFunctionBegin;
3756   PetscCall(MatCreate(comm, A));
3757   PetscCall(MatSetSizes(*A, m, n, m, n));
3758   PetscCall(MatSetType(*A, MATSEQBAIJ));
3759   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3760   PetscFunctionReturn(PETSC_SUCCESS);
3761 }
3762 
3763 /*@
3764   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3765   per row in the matrix. For good matrix assembly performance the
3766   user should preallocate the matrix storage by setting the parameter `nz`
3767   (or the array `nnz`).
3768 
3769   Collective
3770 
3771   Input Parameters:
3772 + B   - the matrix
3773 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3774         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3775 . nz  - number of block nonzeros per block row (same for all rows)
3776 - nnz - array containing the number of block nonzeros in the various block rows
3777         (possibly different for each block row) or `NULL`
3778 
3779   Options Database Keys:
3780 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3781 - -mat_block_size - size of the blocks to use
3782 
3783   Level: intermediate
3784 
3785   Notes:
3786   If the `nnz` parameter is given then the `nz` parameter is ignored
3787 
3788   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3789   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3790   You can also run with the option `-info` and look for messages with the string
3791   malloc in them to see if additional memory allocation was needed.
3792 
3793   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3794   storage.  That is, the stored row and column indices can begin at
3795   either one (as in Fortran) or zero.
3796 
3797   Specify the preallocated storage with either `nz` or `nnz` (not both).
3798   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3799   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3800 
3801 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3802 @*/
3803 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3804 {
3805   PetscFunctionBegin;
3806   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3807   PetscValidType(B, 1);
3808   PetscValidLogicalCollectiveInt(B, bs, 2);
3809   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3810   PetscFunctionReturn(PETSC_SUCCESS);
3811 }
3812 
3813 /*@C
3814   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3815 
3816   Collective
3817 
3818   Input Parameters:
3819 + B  - the matrix
3820 . bs - the blocksize
3821 . i  - the indices into `j` for the start of each local row (indices start with zero)
3822 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3823 - v  - optional values in the matrix, use `NULL` if not provided
3824 
3825   Level: advanced
3826 
3827   Notes:
3828   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3829 
3830   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3831   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3832   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3833   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3834   block column and the second index is over columns within a block.
3835 
3836   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3837 
3838 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3839 @*/
3840 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3841 {
3842   PetscFunctionBegin;
3843   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3844   PetscValidType(B, 1);
3845   PetscValidLogicalCollectiveInt(B, bs, 2);
3846   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3847   PetscFunctionReturn(PETSC_SUCCESS);
3848 }
3849 
3850 /*@
3851   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3852 
3853   Collective
3854 
3855   Input Parameters:
3856 + comm - must be an MPI communicator of size 1
3857 . bs   - size of block
3858 . m    - number of rows
3859 . n    - number of columns
3860 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3861 . j    - column indices
3862 - a    - matrix values
3863 
3864   Output Parameter:
3865 . mat - the matrix
3866 
3867   Level: advanced
3868 
3869   Notes:
3870   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3871   once the matrix is destroyed
3872 
3873   You cannot set new nonzero locations into this matrix, that will generate an error.
3874 
3875   The `i` and `j` indices are 0 based
3876 
3877   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3878 
3879   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3880   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3881   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3882   with column-major ordering within blocks.
3883 
3884 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3885 @*/
3886 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3887 {
3888   Mat_SeqBAIJ *baij;
3889 
3890   PetscFunctionBegin;
3891   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3892   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3893 
3894   PetscCall(MatCreate(comm, mat));
3895   PetscCall(MatSetSizes(*mat, m, n, m, n));
3896   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3897   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3898   baij = (Mat_SeqBAIJ *)(*mat)->data;
3899   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3900 
3901   baij->i = i;
3902   baij->j = j;
3903   baij->a = a;
3904 
3905   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3906   baij->free_a         = PETSC_FALSE;
3907   baij->free_ij        = PETSC_FALSE;
3908   baij->free_imax_ilen = PETSC_TRUE;
3909 
3910   for (PetscInt ii = 0; ii < m; ii++) {
3911     const PetscInt row_len = i[ii + 1] - i[ii];
3912 
3913     baij->ilen[ii] = baij->imax[ii] = row_len;
3914     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3915   }
3916   if (PetscDefined(USE_DEBUG)) {
3917     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3918       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3919       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3920     }
3921   }
3922 
3923   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3924   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3925   PetscFunctionReturn(PETSC_SUCCESS);
3926 }
3927 
3928 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3929 {
3930   PetscFunctionBegin;
3931   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3932   PetscFunctionReturn(PETSC_SUCCESS);
3933 }
3934