xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ca0d62ff0b92f3b3ec10dd93d500c979243edb4e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
12 #define TYPE BAIJ
13 #define TYPE_BS
14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
15 #undef TYPE_BS
16 #define TYPE_BS _BS
17 #define TYPE_BS_ON
18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
19 #undef TYPE_BS
20 #include "../src/mat/impls/aij/seq/seqhashmat.h"
21 #undef TYPE
22 #undef TYPE_BS_ON
23 
24 #if defined(PETSC_HAVE_HYPRE)
25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
26 #endif
27 
28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
30 #endif
31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
32 
33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
34 {
35   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
36   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
37   MatScalar   *a_val = a_aij->a;
38 
39   PetscFunctionBegin;
40   PetscCall(MatGetSize(A, &m, &n));
41   PetscCall(PetscArrayzero(reductions, n));
42   if (type == NORM_2) {
43     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
44       for (jb = 0; jb < bs; jb++) {
45         for (ib = 0; ib < bs; ib++) {
46           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
47           a_val++;
48         }
49       }
50     }
51   } else if (type == NORM_1) {
52     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
53       for (jb = 0; jb < bs; jb++) {
54         for (ib = 0; ib < bs; ib++) {
55           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
56           a_val++;
57         }
58       }
59     }
60   } else if (type == NORM_INFINITY) {
61     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
62       for (jb = 0; jb < bs; jb++) {
63         for (ib = 0; ib < bs; ib++) {
64           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
65           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
66           a_val++;
67         }
68       }
69     }
70   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
71     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
72       for (jb = 0; jb < bs; jb++) {
73         for (ib = 0; ib < bs; ib++) {
74           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
75           a_val++;
76         }
77       }
78     }
79   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
81       for (jb = 0; jb < bs; jb++) {
82         for (ib = 0; ib < bs; ib++) {
83           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
84           a_val++;
85         }
86       }
87     }
88   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
89   if (type == NORM_2) {
90     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
91   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
92     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
93   }
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
98 {
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
100   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
101   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
102   PetscReal    shift = 0.0;
103   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
104 
105   PetscFunctionBegin;
106   allowzeropivot = PetscNot(A->erroriffailure);
107 
108   if (a->idiagvalid) {
109     if (values) *values = a->idiag;
110     PetscFunctionReturn(PETSC_SUCCESS);
111   }
112   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
113   diag_offset = a->diag;
114   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
115   diag = a->idiag;
116   if (values) *values = a->idiag;
117   /* factor and invert each block */
118   switch (bs) {
119   case 1:
120     for (i = 0; i < mbs; i++) {
121       odiag   = v + 1 * diag_offset[i];
122       diag[0] = odiag[0];
123 
124       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
125         if (allowzeropivot) {
126           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
128           A->factorerror_zeropivot_row   = i;
129           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
130         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
131       }
132 
133       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134       diag += 1;
135     }
136     break;
137   case 2:
138     for (i = 0; i < mbs; i++) {
139       odiag   = v + 4 * diag_offset[i];
140       diag[0] = odiag[0];
141       diag[1] = odiag[1];
142       diag[2] = odiag[2];
143       diag[3] = odiag[3];
144       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146       diag += 4;
147     }
148     break;
149   case 3:
150     for (i = 0; i < mbs; i++) {
151       odiag   = v + 9 * diag_offset[i];
152       diag[0] = odiag[0];
153       diag[1] = odiag[1];
154       diag[2] = odiag[2];
155       diag[3] = odiag[3];
156       diag[4] = odiag[4];
157       diag[5] = odiag[5];
158       diag[6] = odiag[6];
159       diag[7] = odiag[7];
160       diag[8] = odiag[8];
161       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag += 9;
164     }
165     break;
166   case 4:
167     for (i = 0; i < mbs; i++) {
168       odiag = v + 16 * diag_offset[i];
169       PetscCall(PetscArraycpy(diag, odiag, 16));
170       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag += 16;
173     }
174     break;
175   case 5:
176     for (i = 0; i < mbs; i++) {
177       odiag = v + 25 * diag_offset[i];
178       PetscCall(PetscArraycpy(diag, odiag, 25));
179       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag += 25;
182     }
183     break;
184   case 6:
185     for (i = 0; i < mbs; i++) {
186       odiag = v + 36 * diag_offset[i];
187       PetscCall(PetscArraycpy(diag, odiag, 36));
188       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190       diag += 36;
191     }
192     break;
193   case 7:
194     for (i = 0; i < mbs; i++) {
195       odiag = v + 49 * diag_offset[i];
196       PetscCall(PetscArraycpy(diag, odiag, 49));
197       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199       diag += 49;
200     }
201     break;
202   default:
203     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204     for (i = 0; i < mbs; i++) {
205       odiag = v + bs2 * diag_offset[i];
206       PetscCall(PetscArraycpy(diag, odiag, bs2));
207       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209       diag += bs2;
210     }
211     PetscCall(PetscFree2(v_work, v_pivots));
212   }
213   a->idiagvalid = PETSC_TRUE;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218 {
219   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220   PetscScalar       *x, *work, *w, *workt, *t;
221   const MatScalar   *v, *aa = a->a, *idiag;
222   const PetscScalar *b, *xb;
223   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226 
227   PetscFunctionBegin;
228   its = its * lits;
229   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234 
235   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236 
237   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238   diag  = a->diag;
239   idiag = a->idiag;
240   k     = PetscMax(A->rmap->n, A->cmap->n);
241   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244   work = a->mult_work;
245   t    = a->sor_workt;
246   w    = a->sor_work;
247 
248   PetscCall(VecGetArray(xx, &x));
249   PetscCall(VecGetArrayRead(bb, &b));
250 
251   if (flag & SOR_ZERO_INITIAL_GUESS) {
252     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253       switch (bs) {
254       case 1:
255         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256         t[0] = b[0];
257         i2   = 1;
258         idiag += 1;
259         for (i = 1; i < m; i++) {
260           v    = aa + ai[i];
261           vi   = aj + ai[i];
262           nz   = diag[i] - ai[i];
263           s[0] = b[i2];
264           for (j = 0; j < nz; j++) {
265             xw[0] = x[vi[j]];
266             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267           }
268           t[i2] = s[0];
269           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270           x[i2] = xw[0];
271           idiag += 1;
272           i2 += 1;
273         }
274         break;
275       case 2:
276         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277         t[0] = b[0];
278         t[1] = b[1];
279         i2   = 2;
280         idiag += 4;
281         for (i = 1; i < m; i++) {
282           v    = aa + 4 * ai[i];
283           vi   = aj + ai[i];
284           nz   = diag[i] - ai[i];
285           s[0] = b[i2];
286           s[1] = b[i2 + 1];
287           for (j = 0; j < nz; j++) {
288             idx   = 2 * vi[j];
289             it    = 4 * j;
290             xw[0] = x[idx];
291             xw[1] = x[1 + idx];
292             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293           }
294           t[i2]     = s[0];
295           t[i2 + 1] = s[1];
296           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297           x[i2]     = xw[0];
298           x[i2 + 1] = xw[1];
299           idiag += 4;
300           i2 += 2;
301         }
302         break;
303       case 3:
304         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305         t[0] = b[0];
306         t[1] = b[1];
307         t[2] = b[2];
308         i2   = 3;
309         idiag += 9;
310         for (i = 1; i < m; i++) {
311           v    = aa + 9 * ai[i];
312           vi   = aj + ai[i];
313           nz   = diag[i] - ai[i];
314           s[0] = b[i2];
315           s[1] = b[i2 + 1];
316           s[2] = b[i2 + 2];
317           while (nz--) {
318             idx   = 3 * (*vi++);
319             xw[0] = x[idx];
320             xw[1] = x[1 + idx];
321             xw[2] = x[2 + idx];
322             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323             v += 9;
324           }
325           t[i2]     = s[0];
326           t[i2 + 1] = s[1];
327           t[i2 + 2] = s[2];
328           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329           x[i2]     = xw[0];
330           x[i2 + 1] = xw[1];
331           x[i2 + 2] = xw[2];
332           idiag += 9;
333           i2 += 3;
334         }
335         break;
336       case 4:
337         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338         t[0] = b[0];
339         t[1] = b[1];
340         t[2] = b[2];
341         t[3] = b[3];
342         i2   = 4;
343         idiag += 16;
344         for (i = 1; i < m; i++) {
345           v    = aa + 16 * ai[i];
346           vi   = aj + ai[i];
347           nz   = diag[i] - ai[i];
348           s[0] = b[i2];
349           s[1] = b[i2 + 1];
350           s[2] = b[i2 + 2];
351           s[3] = b[i2 + 3];
352           while (nz--) {
353             idx   = 4 * (*vi++);
354             xw[0] = x[idx];
355             xw[1] = x[1 + idx];
356             xw[2] = x[2 + idx];
357             xw[3] = x[3 + idx];
358             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359             v += 16;
360           }
361           t[i2]     = s[0];
362           t[i2 + 1] = s[1];
363           t[i2 + 2] = s[2];
364           t[i2 + 3] = s[3];
365           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366           x[i2]     = xw[0];
367           x[i2 + 1] = xw[1];
368           x[i2 + 2] = xw[2];
369           x[i2 + 3] = xw[3];
370           idiag += 16;
371           i2 += 4;
372         }
373         break;
374       case 5:
375         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376         t[0] = b[0];
377         t[1] = b[1];
378         t[2] = b[2];
379         t[3] = b[3];
380         t[4] = b[4];
381         i2   = 5;
382         idiag += 25;
383         for (i = 1; i < m; i++) {
384           v    = aa + 25 * ai[i];
385           vi   = aj + ai[i];
386           nz   = diag[i] - ai[i];
387           s[0] = b[i2];
388           s[1] = b[i2 + 1];
389           s[2] = b[i2 + 2];
390           s[3] = b[i2 + 3];
391           s[4] = b[i2 + 4];
392           while (nz--) {
393             idx   = 5 * (*vi++);
394             xw[0] = x[idx];
395             xw[1] = x[1 + idx];
396             xw[2] = x[2 + idx];
397             xw[3] = x[3 + idx];
398             xw[4] = x[4 + idx];
399             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400             v += 25;
401           }
402           t[i2]     = s[0];
403           t[i2 + 1] = s[1];
404           t[i2 + 2] = s[2];
405           t[i2 + 3] = s[3];
406           t[i2 + 4] = s[4];
407           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408           x[i2]     = xw[0];
409           x[i2 + 1] = xw[1];
410           x[i2 + 2] = xw[2];
411           x[i2 + 3] = xw[3];
412           x[i2 + 4] = xw[4];
413           idiag += 25;
414           i2 += 5;
415         }
416         break;
417       case 6:
418         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419         t[0] = b[0];
420         t[1] = b[1];
421         t[2] = b[2];
422         t[3] = b[3];
423         t[4] = b[4];
424         t[5] = b[5];
425         i2   = 6;
426         idiag += 36;
427         for (i = 1; i < m; i++) {
428           v    = aa + 36 * ai[i];
429           vi   = aj + ai[i];
430           nz   = diag[i] - ai[i];
431           s[0] = b[i2];
432           s[1] = b[i2 + 1];
433           s[2] = b[i2 + 2];
434           s[3] = b[i2 + 3];
435           s[4] = b[i2 + 4];
436           s[5] = b[i2 + 5];
437           while (nz--) {
438             idx   = 6 * (*vi++);
439             xw[0] = x[idx];
440             xw[1] = x[1 + idx];
441             xw[2] = x[2 + idx];
442             xw[3] = x[3 + idx];
443             xw[4] = x[4 + idx];
444             xw[5] = x[5 + idx];
445             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446             v += 36;
447           }
448           t[i2]     = s[0];
449           t[i2 + 1] = s[1];
450           t[i2 + 2] = s[2];
451           t[i2 + 3] = s[3];
452           t[i2 + 4] = s[4];
453           t[i2 + 5] = s[5];
454           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455           x[i2]     = xw[0];
456           x[i2 + 1] = xw[1];
457           x[i2 + 2] = xw[2];
458           x[i2 + 3] = xw[3];
459           x[i2 + 4] = xw[4];
460           x[i2 + 5] = xw[5];
461           idiag += 36;
462           i2 += 6;
463         }
464         break;
465       case 7:
466         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467         t[0] = b[0];
468         t[1] = b[1];
469         t[2] = b[2];
470         t[3] = b[3];
471         t[4] = b[4];
472         t[5] = b[5];
473         t[6] = b[6];
474         i2   = 7;
475         idiag += 49;
476         for (i = 1; i < m; i++) {
477           v    = aa + 49 * ai[i];
478           vi   = aj + ai[i];
479           nz   = diag[i] - ai[i];
480           s[0] = b[i2];
481           s[1] = b[i2 + 1];
482           s[2] = b[i2 + 2];
483           s[3] = b[i2 + 3];
484           s[4] = b[i2 + 4];
485           s[5] = b[i2 + 5];
486           s[6] = b[i2 + 6];
487           while (nz--) {
488             idx   = 7 * (*vi++);
489             xw[0] = x[idx];
490             xw[1] = x[1 + idx];
491             xw[2] = x[2 + idx];
492             xw[3] = x[3 + idx];
493             xw[4] = x[4 + idx];
494             xw[5] = x[5 + idx];
495             xw[6] = x[6 + idx];
496             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497             v += 49;
498           }
499           t[i2]     = s[0];
500           t[i2 + 1] = s[1];
501           t[i2 + 2] = s[2];
502           t[i2 + 3] = s[3];
503           t[i2 + 4] = s[4];
504           t[i2 + 5] = s[5];
505           t[i2 + 6] = s[6];
506           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507           x[i2]     = xw[0];
508           x[i2 + 1] = xw[1];
509           x[i2 + 2] = xw[2];
510           x[i2 + 3] = xw[3];
511           x[i2 + 4] = xw[4];
512           x[i2 + 5] = xw[5];
513           x[i2 + 6] = xw[6];
514           idiag += 49;
515           i2 += 7;
516         }
517         break;
518       default:
519         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520         PetscCall(PetscArraycpy(t, b, bs));
521         i2 = bs;
522         idiag += bs2;
523         for (i = 1; i < m; i++) {
524           v  = aa + bs2 * ai[i];
525           vi = aj + ai[i];
526           nz = diag[i] - ai[i];
527 
528           PetscCall(PetscArraycpy(w, b + i2, bs));
529           /* copy all rows of x that are needed into contiguous space */
530           workt = work;
531           for (j = 0; j < nz; j++) {
532             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533             workt += bs;
534           }
535           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536           PetscCall(PetscArraycpy(t + i2, w, bs));
537           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538 
539           idiag += bs2;
540           i2 += bs;
541         }
542         break;
543       }
544       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546       xb = t;
547     } else xb = b;
548     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549       idiag = a->idiag + bs2 * (a->mbs - 1);
550       i2    = bs * (m - 1);
551       switch (bs) {
552       case 1:
553         s[0] = xb[i2];
554         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555         x[i2] = xw[0];
556         i2 -= 1;
557         for (i = m - 2; i >= 0; i--) {
558           v    = aa + (diag[i] + 1);
559           vi   = aj + diag[i] + 1;
560           nz   = ai[i + 1] - diag[i] - 1;
561           s[0] = xb[i2];
562           for (j = 0; j < nz; j++) {
563             xw[0] = x[vi[j]];
564             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565           }
566           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567           x[i2] = xw[0];
568           idiag -= 1;
569           i2 -= 1;
570         }
571         break;
572       case 2:
573         s[0] = xb[i2];
574         s[1] = xb[i2 + 1];
575         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576         x[i2]     = xw[0];
577         x[i2 + 1] = xw[1];
578         i2 -= 2;
579         idiag -= 4;
580         for (i = m - 2; i >= 0; i--) {
581           v    = aa + 4 * (diag[i] + 1);
582           vi   = aj + diag[i] + 1;
583           nz   = ai[i + 1] - diag[i] - 1;
584           s[0] = xb[i2];
585           s[1] = xb[i2 + 1];
586           for (j = 0; j < nz; j++) {
587             idx   = 2 * vi[j];
588             it    = 4 * j;
589             xw[0] = x[idx];
590             xw[1] = x[1 + idx];
591             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592           }
593           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594           x[i2]     = xw[0];
595           x[i2 + 1] = xw[1];
596           idiag -= 4;
597           i2 -= 2;
598         }
599         break;
600       case 3:
601         s[0] = xb[i2];
602         s[1] = xb[i2 + 1];
603         s[2] = xb[i2 + 2];
604         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605         x[i2]     = xw[0];
606         x[i2 + 1] = xw[1];
607         x[i2 + 2] = xw[2];
608         i2 -= 3;
609         idiag -= 9;
610         for (i = m - 2; i >= 0; i--) {
611           v    = aa + 9 * (diag[i] + 1);
612           vi   = aj + diag[i] + 1;
613           nz   = ai[i + 1] - diag[i] - 1;
614           s[0] = xb[i2];
615           s[1] = xb[i2 + 1];
616           s[2] = xb[i2 + 2];
617           while (nz--) {
618             idx   = 3 * (*vi++);
619             xw[0] = x[idx];
620             xw[1] = x[1 + idx];
621             xw[2] = x[2 + idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623             v += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626           x[i2]     = xw[0];
627           x[i2 + 1] = xw[1];
628           x[i2 + 2] = xw[2];
629           idiag -= 9;
630           i2 -= 3;
631         }
632         break;
633       case 4:
634         s[0] = xb[i2];
635         s[1] = xb[i2 + 1];
636         s[2] = xb[i2 + 2];
637         s[3] = xb[i2 + 3];
638         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639         x[i2]     = xw[0];
640         x[i2 + 1] = xw[1];
641         x[i2 + 2] = xw[2];
642         x[i2 + 3] = xw[3];
643         i2 -= 4;
644         idiag -= 16;
645         for (i = m - 2; i >= 0; i--) {
646           v    = aa + 16 * (diag[i] + 1);
647           vi   = aj + diag[i] + 1;
648           nz   = ai[i + 1] - diag[i] - 1;
649           s[0] = xb[i2];
650           s[1] = xb[i2 + 1];
651           s[2] = xb[i2 + 2];
652           s[3] = xb[i2 + 3];
653           while (nz--) {
654             idx   = 4 * (*vi++);
655             xw[0] = x[idx];
656             xw[1] = x[1 + idx];
657             xw[2] = x[2 + idx];
658             xw[3] = x[3 + idx];
659             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660             v += 16;
661           }
662           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663           x[i2]     = xw[0];
664           x[i2 + 1] = xw[1];
665           x[i2 + 2] = xw[2];
666           x[i2 + 3] = xw[3];
667           idiag -= 16;
668           i2 -= 4;
669         }
670         break;
671       case 5:
672         s[0] = xb[i2];
673         s[1] = xb[i2 + 1];
674         s[2] = xb[i2 + 2];
675         s[3] = xb[i2 + 3];
676         s[4] = xb[i2 + 4];
677         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678         x[i2]     = xw[0];
679         x[i2 + 1] = xw[1];
680         x[i2 + 2] = xw[2];
681         x[i2 + 3] = xw[3];
682         x[i2 + 4] = xw[4];
683         i2 -= 5;
684         idiag -= 25;
685         for (i = m - 2; i >= 0; i--) {
686           v    = aa + 25 * (diag[i] + 1);
687           vi   = aj + diag[i] + 1;
688           nz   = ai[i + 1] - diag[i] - 1;
689           s[0] = xb[i2];
690           s[1] = xb[i2 + 1];
691           s[2] = xb[i2 + 2];
692           s[3] = xb[i2 + 3];
693           s[4] = xb[i2 + 4];
694           while (nz--) {
695             idx   = 5 * (*vi++);
696             xw[0] = x[idx];
697             xw[1] = x[1 + idx];
698             xw[2] = x[2 + idx];
699             xw[3] = x[3 + idx];
700             xw[4] = x[4 + idx];
701             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702             v += 25;
703           }
704           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705           x[i2]     = xw[0];
706           x[i2 + 1] = xw[1];
707           x[i2 + 2] = xw[2];
708           x[i2 + 3] = xw[3];
709           x[i2 + 4] = xw[4];
710           idiag -= 25;
711           i2 -= 5;
712         }
713         break;
714       case 6:
715         s[0] = xb[i2];
716         s[1] = xb[i2 + 1];
717         s[2] = xb[i2 + 2];
718         s[3] = xb[i2 + 3];
719         s[4] = xb[i2 + 4];
720         s[5] = xb[i2 + 5];
721         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722         x[i2]     = xw[0];
723         x[i2 + 1] = xw[1];
724         x[i2 + 2] = xw[2];
725         x[i2 + 3] = xw[3];
726         x[i2 + 4] = xw[4];
727         x[i2 + 5] = xw[5];
728         i2 -= 6;
729         idiag -= 36;
730         for (i = m - 2; i >= 0; i--) {
731           v    = aa + 36 * (diag[i] + 1);
732           vi   = aj + diag[i] + 1;
733           nz   = ai[i + 1] - diag[i] - 1;
734           s[0] = xb[i2];
735           s[1] = xb[i2 + 1];
736           s[2] = xb[i2 + 2];
737           s[3] = xb[i2 + 3];
738           s[4] = xb[i2 + 4];
739           s[5] = xb[i2 + 5];
740           while (nz--) {
741             idx   = 6 * (*vi++);
742             xw[0] = x[idx];
743             xw[1] = x[1 + idx];
744             xw[2] = x[2 + idx];
745             xw[3] = x[3 + idx];
746             xw[4] = x[4 + idx];
747             xw[5] = x[5 + idx];
748             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749             v += 36;
750           }
751           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752           x[i2]     = xw[0];
753           x[i2 + 1] = xw[1];
754           x[i2 + 2] = xw[2];
755           x[i2 + 3] = xw[3];
756           x[i2 + 4] = xw[4];
757           x[i2 + 5] = xw[5];
758           idiag -= 36;
759           i2 -= 6;
760         }
761         break;
762       case 7:
763         s[0] = xb[i2];
764         s[1] = xb[i2 + 1];
765         s[2] = xb[i2 + 2];
766         s[3] = xb[i2 + 3];
767         s[4] = xb[i2 + 4];
768         s[5] = xb[i2 + 5];
769         s[6] = xb[i2 + 6];
770         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771         x[i2]     = xw[0];
772         x[i2 + 1] = xw[1];
773         x[i2 + 2] = xw[2];
774         x[i2 + 3] = xw[3];
775         x[i2 + 4] = xw[4];
776         x[i2 + 5] = xw[5];
777         x[i2 + 6] = xw[6];
778         i2 -= 7;
779         idiag -= 49;
780         for (i = m - 2; i >= 0; i--) {
781           v    = aa + 49 * (diag[i] + 1);
782           vi   = aj + diag[i] + 1;
783           nz   = ai[i + 1] - diag[i] - 1;
784           s[0] = xb[i2];
785           s[1] = xb[i2 + 1];
786           s[2] = xb[i2 + 2];
787           s[3] = xb[i2 + 3];
788           s[4] = xb[i2 + 4];
789           s[5] = xb[i2 + 5];
790           s[6] = xb[i2 + 6];
791           while (nz--) {
792             idx   = 7 * (*vi++);
793             xw[0] = x[idx];
794             xw[1] = x[1 + idx];
795             xw[2] = x[2 + idx];
796             xw[3] = x[3 + idx];
797             xw[4] = x[4 + idx];
798             xw[5] = x[5 + idx];
799             xw[6] = x[6 + idx];
800             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801             v += 49;
802           }
803           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804           x[i2]     = xw[0];
805           x[i2 + 1] = xw[1];
806           x[i2 + 2] = xw[2];
807           x[i2 + 3] = xw[3];
808           x[i2 + 4] = xw[4];
809           x[i2 + 5] = xw[5];
810           x[i2 + 6] = xw[6];
811           idiag -= 49;
812           i2 -= 7;
813         }
814         break;
815       default:
816         PetscCall(PetscArraycpy(w, xb + i2, bs));
817         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818         i2 -= bs;
819         idiag -= bs2;
820         for (i = m - 2; i >= 0; i--) {
821           v  = aa + bs2 * (diag[i] + 1);
822           vi = aj + diag[i] + 1;
823           nz = ai[i + 1] - diag[i] - 1;
824 
825           PetscCall(PetscArraycpy(w, xb + i2, bs));
826           /* copy all rows of x that are needed into contiguous space */
827           workt = work;
828           for (j = 0; j < nz; j++) {
829             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830             workt += bs;
831           }
832           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834 
835           idiag -= bs2;
836           i2 -= bs;
837         }
838         break;
839       }
840       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841     }
842     its--;
843   }
844   while (its--) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846       idiag = a->idiag;
847       i2    = 0;
848       switch (bs) {
849       case 1:
850         for (i = 0; i < m; i++) {
851           v    = aa + ai[i];
852           vi   = aj + ai[i];
853           nz   = ai[i + 1] - ai[i];
854           s[0] = b[i2];
855           for (j = 0; j < nz; j++) {
856             xw[0] = x[vi[j]];
857             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858           }
859           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860           x[i2] += xw[0];
861           idiag += 1;
862           i2 += 1;
863         }
864         break;
865       case 2:
866         for (i = 0; i < m; i++) {
867           v    = aa + 4 * ai[i];
868           vi   = aj + ai[i];
869           nz   = ai[i + 1] - ai[i];
870           s[0] = b[i2];
871           s[1] = b[i2 + 1];
872           for (j = 0; j < nz; j++) {
873             idx   = 2 * vi[j];
874             it    = 4 * j;
875             xw[0] = x[idx];
876             xw[1] = x[1 + idx];
877             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878           }
879           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880           x[i2] += xw[0];
881           x[i2 + 1] += xw[1];
882           idiag += 4;
883           i2 += 2;
884         }
885         break;
886       case 3:
887         for (i = 0; i < m; i++) {
888           v    = aa + 9 * ai[i];
889           vi   = aj + ai[i];
890           nz   = ai[i + 1] - ai[i];
891           s[0] = b[i2];
892           s[1] = b[i2 + 1];
893           s[2] = b[i2 + 2];
894           while (nz--) {
895             idx   = 3 * (*vi++);
896             xw[0] = x[idx];
897             xw[1] = x[1 + idx];
898             xw[2] = x[2 + idx];
899             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900             v += 9;
901           }
902           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903           x[i2] += xw[0];
904           x[i2 + 1] += xw[1];
905           x[i2 + 2] += xw[2];
906           idiag += 9;
907           i2 += 3;
908         }
909         break;
910       case 4:
911         for (i = 0; i < m; i++) {
912           v    = aa + 16 * ai[i];
913           vi   = aj + ai[i];
914           nz   = ai[i + 1] - ai[i];
915           s[0] = b[i2];
916           s[1] = b[i2 + 1];
917           s[2] = b[i2 + 2];
918           s[3] = b[i2 + 3];
919           while (nz--) {
920             idx   = 4 * (*vi++);
921             xw[0] = x[idx];
922             xw[1] = x[1 + idx];
923             xw[2] = x[2 + idx];
924             xw[3] = x[3 + idx];
925             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926             v += 16;
927           }
928           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929           x[i2] += xw[0];
930           x[i2 + 1] += xw[1];
931           x[i2 + 2] += xw[2];
932           x[i2 + 3] += xw[3];
933           idiag += 16;
934           i2 += 4;
935         }
936         break;
937       case 5:
938         for (i = 0; i < m; i++) {
939           v    = aa + 25 * ai[i];
940           vi   = aj + ai[i];
941           nz   = ai[i + 1] - ai[i];
942           s[0] = b[i2];
943           s[1] = b[i2 + 1];
944           s[2] = b[i2 + 2];
945           s[3] = b[i2 + 3];
946           s[4] = b[i2 + 4];
947           while (nz--) {
948             idx   = 5 * (*vi++);
949             xw[0] = x[idx];
950             xw[1] = x[1 + idx];
951             xw[2] = x[2 + idx];
952             xw[3] = x[3 + idx];
953             xw[4] = x[4 + idx];
954             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955             v += 25;
956           }
957           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958           x[i2] += xw[0];
959           x[i2 + 1] += xw[1];
960           x[i2 + 2] += xw[2];
961           x[i2 + 3] += xw[3];
962           x[i2 + 4] += xw[4];
963           idiag += 25;
964           i2 += 5;
965         }
966         break;
967       case 6:
968         for (i = 0; i < m; i++) {
969           v    = aa + 36 * ai[i];
970           vi   = aj + ai[i];
971           nz   = ai[i + 1] - ai[i];
972           s[0] = b[i2];
973           s[1] = b[i2 + 1];
974           s[2] = b[i2 + 2];
975           s[3] = b[i2 + 3];
976           s[4] = b[i2 + 4];
977           s[5] = b[i2 + 5];
978           while (nz--) {
979             idx   = 6 * (*vi++);
980             xw[0] = x[idx];
981             xw[1] = x[1 + idx];
982             xw[2] = x[2 + idx];
983             xw[3] = x[3 + idx];
984             xw[4] = x[4 + idx];
985             xw[5] = x[5 + idx];
986             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987             v += 36;
988           }
989           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990           x[i2] += xw[0];
991           x[i2 + 1] += xw[1];
992           x[i2 + 2] += xw[2];
993           x[i2 + 3] += xw[3];
994           x[i2 + 4] += xw[4];
995           x[i2 + 5] += xw[5];
996           idiag += 36;
997           i2 += 6;
998         }
999         break;
1000       case 7:
1001         for (i = 0; i < m; i++) {
1002           v    = aa + 49 * ai[i];
1003           vi   = aj + ai[i];
1004           nz   = ai[i + 1] - ai[i];
1005           s[0] = b[i2];
1006           s[1] = b[i2 + 1];
1007           s[2] = b[i2 + 2];
1008           s[3] = b[i2 + 3];
1009           s[4] = b[i2 + 4];
1010           s[5] = b[i2 + 5];
1011           s[6] = b[i2 + 6];
1012           while (nz--) {
1013             idx   = 7 * (*vi++);
1014             xw[0] = x[idx];
1015             xw[1] = x[1 + idx];
1016             xw[2] = x[2 + idx];
1017             xw[3] = x[3 + idx];
1018             xw[4] = x[4 + idx];
1019             xw[5] = x[5 + idx];
1020             xw[6] = x[6 + idx];
1021             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022             v += 49;
1023           }
1024           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025           x[i2] += xw[0];
1026           x[i2 + 1] += xw[1];
1027           x[i2 + 2] += xw[2];
1028           x[i2 + 3] += xw[3];
1029           x[i2 + 4] += xw[4];
1030           x[i2 + 5] += xw[5];
1031           x[i2 + 6] += xw[6];
1032           idiag += 49;
1033           i2 += 7;
1034         }
1035         break;
1036       default:
1037         for (i = 0; i < m; i++) {
1038           v  = aa + bs2 * ai[i];
1039           vi = aj + ai[i];
1040           nz = ai[i + 1] - ai[i];
1041 
1042           PetscCall(PetscArraycpy(w, b + i2, bs));
1043           /* copy all rows of x that are needed into contiguous space */
1044           workt = work;
1045           for (j = 0; j < nz; j++) {
1046             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047             workt += bs;
1048           }
1049           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051 
1052           idiag += bs2;
1053           i2 += bs;
1054         }
1055         break;
1056       }
1057       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058     }
1059     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060       idiag = a->idiag + bs2 * (a->mbs - 1);
1061       i2    = bs * (m - 1);
1062       switch (bs) {
1063       case 1:
1064         for (i = m - 1; i >= 0; i--) {
1065           v    = aa + ai[i];
1066           vi   = aj + ai[i];
1067           nz   = ai[i + 1] - ai[i];
1068           s[0] = b[i2];
1069           for (j = 0; j < nz; j++) {
1070             xw[0] = x[vi[j]];
1071             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072           }
1073           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074           x[i2] += xw[0];
1075           idiag -= 1;
1076           i2 -= 1;
1077         }
1078         break;
1079       case 2:
1080         for (i = m - 1; i >= 0; i--) {
1081           v    = aa + 4 * ai[i];
1082           vi   = aj + ai[i];
1083           nz   = ai[i + 1] - ai[i];
1084           s[0] = b[i2];
1085           s[1] = b[i2 + 1];
1086           for (j = 0; j < nz; j++) {
1087             idx   = 2 * vi[j];
1088             it    = 4 * j;
1089             xw[0] = x[idx];
1090             xw[1] = x[1 + idx];
1091             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092           }
1093           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094           x[i2] += xw[0];
1095           x[i2 + 1] += xw[1];
1096           idiag -= 4;
1097           i2 -= 2;
1098         }
1099         break;
1100       case 3:
1101         for (i = m - 1; i >= 0; i--) {
1102           v    = aa + 9 * ai[i];
1103           vi   = aj + ai[i];
1104           nz   = ai[i + 1] - ai[i];
1105           s[0] = b[i2];
1106           s[1] = b[i2 + 1];
1107           s[2] = b[i2 + 2];
1108           while (nz--) {
1109             idx   = 3 * (*vi++);
1110             xw[0] = x[idx];
1111             xw[1] = x[1 + idx];
1112             xw[2] = x[2 + idx];
1113             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114             v += 9;
1115           }
1116           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117           x[i2] += xw[0];
1118           x[i2 + 1] += xw[1];
1119           x[i2 + 2] += xw[2];
1120           idiag -= 9;
1121           i2 -= 3;
1122         }
1123         break;
1124       case 4:
1125         for (i = m - 1; i >= 0; i--) {
1126           v    = aa + 16 * ai[i];
1127           vi   = aj + ai[i];
1128           nz   = ai[i + 1] - ai[i];
1129           s[0] = b[i2];
1130           s[1] = b[i2 + 1];
1131           s[2] = b[i2 + 2];
1132           s[3] = b[i2 + 3];
1133           while (nz--) {
1134             idx   = 4 * (*vi++);
1135             xw[0] = x[idx];
1136             xw[1] = x[1 + idx];
1137             xw[2] = x[2 + idx];
1138             xw[3] = x[3 + idx];
1139             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140             v += 16;
1141           }
1142           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143           x[i2] += xw[0];
1144           x[i2 + 1] += xw[1];
1145           x[i2 + 2] += xw[2];
1146           x[i2 + 3] += xw[3];
1147           idiag -= 16;
1148           i2 -= 4;
1149         }
1150         break;
1151       case 5:
1152         for (i = m - 1; i >= 0; i--) {
1153           v    = aa + 25 * ai[i];
1154           vi   = aj + ai[i];
1155           nz   = ai[i + 1] - ai[i];
1156           s[0] = b[i2];
1157           s[1] = b[i2 + 1];
1158           s[2] = b[i2 + 2];
1159           s[3] = b[i2 + 3];
1160           s[4] = b[i2 + 4];
1161           while (nz--) {
1162             idx   = 5 * (*vi++);
1163             xw[0] = x[idx];
1164             xw[1] = x[1 + idx];
1165             xw[2] = x[2 + idx];
1166             xw[3] = x[3 + idx];
1167             xw[4] = x[4 + idx];
1168             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169             v += 25;
1170           }
1171           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172           x[i2] += xw[0];
1173           x[i2 + 1] += xw[1];
1174           x[i2 + 2] += xw[2];
1175           x[i2 + 3] += xw[3];
1176           x[i2 + 4] += xw[4];
1177           idiag -= 25;
1178           i2 -= 5;
1179         }
1180         break;
1181       case 6:
1182         for (i = m - 1; i >= 0; i--) {
1183           v    = aa + 36 * ai[i];
1184           vi   = aj + ai[i];
1185           nz   = ai[i + 1] - ai[i];
1186           s[0] = b[i2];
1187           s[1] = b[i2 + 1];
1188           s[2] = b[i2 + 2];
1189           s[3] = b[i2 + 3];
1190           s[4] = b[i2 + 4];
1191           s[5] = b[i2 + 5];
1192           while (nz--) {
1193             idx   = 6 * (*vi++);
1194             xw[0] = x[idx];
1195             xw[1] = x[1 + idx];
1196             xw[2] = x[2 + idx];
1197             xw[3] = x[3 + idx];
1198             xw[4] = x[4 + idx];
1199             xw[5] = x[5 + idx];
1200             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201             v += 36;
1202           }
1203           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204           x[i2] += xw[0];
1205           x[i2 + 1] += xw[1];
1206           x[i2 + 2] += xw[2];
1207           x[i2 + 3] += xw[3];
1208           x[i2 + 4] += xw[4];
1209           x[i2 + 5] += xw[5];
1210           idiag -= 36;
1211           i2 -= 6;
1212         }
1213         break;
1214       case 7:
1215         for (i = m - 1; i >= 0; i--) {
1216           v    = aa + 49 * ai[i];
1217           vi   = aj + ai[i];
1218           nz   = ai[i + 1] - ai[i];
1219           s[0] = b[i2];
1220           s[1] = b[i2 + 1];
1221           s[2] = b[i2 + 2];
1222           s[3] = b[i2 + 3];
1223           s[4] = b[i2 + 4];
1224           s[5] = b[i2 + 5];
1225           s[6] = b[i2 + 6];
1226           while (nz--) {
1227             idx   = 7 * (*vi++);
1228             xw[0] = x[idx];
1229             xw[1] = x[1 + idx];
1230             xw[2] = x[2 + idx];
1231             xw[3] = x[3 + idx];
1232             xw[4] = x[4 + idx];
1233             xw[5] = x[5 + idx];
1234             xw[6] = x[6 + idx];
1235             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236             v += 49;
1237           }
1238           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239           x[i2] += xw[0];
1240           x[i2 + 1] += xw[1];
1241           x[i2 + 2] += xw[2];
1242           x[i2 + 3] += xw[3];
1243           x[i2 + 4] += xw[4];
1244           x[i2 + 5] += xw[5];
1245           x[i2 + 6] += xw[6];
1246           idiag -= 49;
1247           i2 -= 7;
1248         }
1249         break;
1250       default:
1251         for (i = m - 1; i >= 0; i--) {
1252           v  = aa + bs2 * ai[i];
1253           vi = aj + ai[i];
1254           nz = ai[i + 1] - ai[i];
1255 
1256           PetscCall(PetscArraycpy(w, b + i2, bs));
1257           /* copy all rows of x that are needed into contiguous space */
1258           workt = work;
1259           for (j = 0; j < nz; j++) {
1260             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261             workt += bs;
1262           }
1263           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265 
1266           idiag -= bs2;
1267           i2 -= bs;
1268         }
1269         break;
1270       }
1271       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272     }
1273   }
1274   PetscCall(VecRestoreArray(xx, &x));
1275   PetscCall(VecRestoreArrayRead(bb, &b));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 /*
1280     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281 */
1282 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286 #endif
1287 
1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289 {
1290   Mat                A = *AA;
1291   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293   PetscInt          *ai = a->i, *ailen = a->ilen;
1294   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295   const PetscScalar *value = v;
1296   MatScalar         *ap, *aa = a->a, *bap;
1297 
1298   PetscFunctionBegin;
1299   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300   stepval = (n - 1) * 4;
1301   for (k = 0; k < m; k++) { /* loop over added rows */
1302     row  = im[k];
1303     rp   = aj + ai[row];
1304     ap   = aa + 16 * ai[row];
1305     nrow = ailen[row];
1306     low  = 0;
1307     high = nrow;
1308     for (l = 0; l < n; l++) { /* loop over added columns */
1309       col = in[l];
1310       if (col <= lastcol) low = 0;
1311       else high = nrow;
1312       lastcol = col;
1313       value   = v + k * (stepval + 4 + l) * 4;
1314       while (high - low > 7) {
1315         t = (low + high) / 2;
1316         if (rp[t] > col) high = t;
1317         else low = t;
1318       }
1319       for (i = low; i < high; i++) {
1320         if (rp[i] > col) break;
1321         if (rp[i] == col) {
1322           bap = ap + 16 * i;
1323           for (ii = 0; ii < 4; ii++, value += stepval) {
1324             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325           }
1326           goto noinsert2;
1327         }
1328       }
1329       N = nrow++ - 1;
1330       high++; /* added new column index thus must search to one higher than before */
1331       /* shift up all the later entries in this row */
1332       for (ii = N; ii >= i; ii--) {
1333         rp[ii + 1] = rp[ii];
1334         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335       }
1336       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337       rp[i] = col;
1338       bap   = ap + 16 * i;
1339       for (ii = 0; ii < 4; ii++, value += stepval) {
1340         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341       }
1342     noinsert2:;
1343       low = i;
1344     }
1345     ailen[row] = nrow;
1346   }
1347   PetscFunctionReturnVoid();
1348 }
1349 
1350 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351   #define matsetvalues4_ MATSETVALUES4
1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353   #define matsetvalues4_ matsetvalues4
1354 #endif
1355 
1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357 {
1358   Mat          A = *AA;
1359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361   PetscInt    *ai = a->i, *ailen = a->ilen;
1362   PetscInt    *aj = a->j, brow, bcol;
1363   PetscInt     ridx, cidx, lastcol = -1;
1364   MatScalar   *ap, value, *aa      = a->a, *bap;
1365 
1366   PetscFunctionBegin;
1367   for (k = 0; k < m; k++) { /* loop over added rows */
1368     row  = im[k];
1369     brow = row / 4;
1370     rp   = aj + ai[brow];
1371     ap   = aa + 16 * ai[brow];
1372     nrow = ailen[brow];
1373     low  = 0;
1374     high = nrow;
1375     for (l = 0; l < n; l++) { /* loop over added columns */
1376       col   = in[l];
1377       bcol  = col / 4;
1378       ridx  = row % 4;
1379       cidx  = col % 4;
1380       value = v[l + k * n];
1381       if (col <= lastcol) low = 0;
1382       else high = nrow;
1383       lastcol = col;
1384       while (high - low > 7) {
1385         t = (low + high) / 2;
1386         if (rp[t] > bcol) high = t;
1387         else low = t;
1388       }
1389       for (i = low; i < high; i++) {
1390         if (rp[i] > bcol) break;
1391         if (rp[i] == bcol) {
1392           bap = ap + 16 * i + 4 * cidx + ridx;
1393           *bap += value;
1394           goto noinsert1;
1395         }
1396       }
1397       N = nrow++ - 1;
1398       high++; /* added new column thus must search to one higher than before */
1399       /* shift up all the later entries in this row */
1400       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403       rp[i]                        = bcol;
1404       ap[16 * i + 4 * cidx + ridx] = value;
1405     noinsert1:;
1406       low = i;
1407     }
1408     ailen[brow] = nrow;
1409   }
1410   PetscFunctionReturnVoid();
1411 }
1412 
1413 /*
1414      Checks for missing diagonals
1415 */
1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1417 {
1418   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1419   PetscInt    *diag, *ii = a->i, i;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1423   *missing = PETSC_FALSE;
1424   if (A->rmap->n > 0 && !ii) {
1425     *missing = PETSC_TRUE;
1426     if (d) *d = 0;
1427     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1428   } else {
1429     PetscInt n;
1430     n    = PetscMin(a->mbs, a->nbs);
1431     diag = a->diag;
1432     for (i = 0; i < n; i++) {
1433       if (diag[i] >= ii[i + 1]) {
1434         *missing = PETSC_TRUE;
1435         if (d) *d = i;
1436         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1437         break;
1438       }
1439     }
1440   }
1441   PetscFunctionReturn(PETSC_SUCCESS);
1442 }
1443 
1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1445 {
1446   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1447   PetscInt     i, j, m = a->mbs;
1448 
1449   PetscFunctionBegin;
1450   if (!a->diag) {
1451     PetscCall(PetscMalloc1(m, &a->diag));
1452     a->free_diag = PETSC_TRUE;
1453   }
1454   for (i = 0; i < m; i++) {
1455     a->diag[i] = a->i[i + 1];
1456     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1457       if (a->j[j] == i) {
1458         a->diag[i] = j;
1459         break;
1460       }
1461     }
1462   }
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1467 {
1468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1469   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1470   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1471 
1472   PetscFunctionBegin;
1473   *nn = n;
1474   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1475   if (symmetric) {
1476     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1477     nz = tia[n];
1478   } else {
1479     tia = a->i;
1480     tja = a->j;
1481   }
1482 
1483   if (!blockcompressed && bs > 1) {
1484     (*nn) *= bs;
1485     /* malloc & create the natural set of indices */
1486     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1487     if (n) {
1488       (*ia)[0] = oshift;
1489       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1490     }
1491 
1492     for (i = 1; i < n; i++) {
1493       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1494       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1495     }
1496     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1497 
1498     if (inja) {
1499       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1500       cnt = 0;
1501       for (i = 0; i < n; i++) {
1502         for (j = 0; j < bs; j++) {
1503           for (k = tia[i]; k < tia[i + 1]; k++) {
1504             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1505           }
1506         }
1507       }
1508     }
1509 
1510     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1511       PetscCall(PetscFree(tia));
1512       PetscCall(PetscFree(tja));
1513     }
1514   } else if (oshift == 1) {
1515     if (symmetric) {
1516       nz = tia[A->rmap->n / bs];
1517       /*  add 1 to i and j indices */
1518       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1519       *ia = tia;
1520       if (ja) {
1521         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1522         *ja = tja;
1523       }
1524     } else {
1525       nz = a->i[A->rmap->n / bs];
1526       /* malloc space and  add 1 to i and j indices */
1527       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1528       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1529       if (ja) {
1530         PetscCall(PetscMalloc1(nz, ja));
1531         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1532       }
1533     }
1534   } else {
1535     *ia = tia;
1536     if (ja) *ja = tja;
1537   }
1538   PetscFunctionReturn(PETSC_SUCCESS);
1539 }
1540 
1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1542 {
1543   PetscFunctionBegin;
1544   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1545   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1546     PetscCall(PetscFree(*ia));
1547     if (ja) PetscCall(PetscFree(*ja));
1548   }
1549   PetscFunctionReturn(PETSC_SUCCESS);
1550 }
1551 
1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1553 {
1554   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1555 
1556   PetscFunctionBegin;
1557   if (A->hash_active) {
1558     PetscInt bs;
1559     A->ops[0] = a->cops;
1560     PetscCall(PetscHMapIJVDestroy(&a->ht));
1561     PetscCall(MatGetBlockSize(A, &bs));
1562     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1563     PetscCall(PetscFree(a->dnz));
1564     PetscCall(PetscFree(a->bdnz));
1565     A->hash_active = PETSC_FALSE;
1566   }
1567   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1568   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1569   PetscCall(ISDestroy(&a->row));
1570   PetscCall(ISDestroy(&a->col));
1571   if (a->free_diag) PetscCall(PetscFree(a->diag));
1572   PetscCall(PetscFree(a->idiag));
1573   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1574   PetscCall(PetscFree(a->solve_work));
1575   PetscCall(PetscFree(a->mult_work));
1576   PetscCall(PetscFree(a->sor_workt));
1577   PetscCall(PetscFree(a->sor_work));
1578   PetscCall(ISDestroy(&a->icol));
1579   PetscCall(PetscFree(a->saved_values));
1580   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1581 
1582   PetscCall(MatDestroy(&a->sbaijMat));
1583   PetscCall(MatDestroy(&a->parent));
1584   PetscCall(PetscFree(A->data));
1585 
1586   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1587   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1588   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1590   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1591   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1596   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1597   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1598 #if defined(PETSC_HAVE_HYPRE)
1599   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1600 #endif
1601   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1602   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1607 {
1608   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1609 
1610   PetscFunctionBegin;
1611   switch (op) {
1612   case MAT_ROW_ORIENTED:
1613     a->roworiented = flg;
1614     break;
1615   case MAT_KEEP_NONZERO_PATTERN:
1616     a->keepnonzeropattern = flg;
1617     break;
1618   case MAT_NEW_NONZERO_LOCATIONS:
1619     a->nonew = (flg ? 0 : 1);
1620     break;
1621   case MAT_NEW_NONZERO_LOCATION_ERR:
1622     a->nonew = (flg ? -1 : 0);
1623     break;
1624   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1625     a->nonew = (flg ? -2 : 0);
1626     break;
1627   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1628     a->nounused = (flg ? -1 : 0);
1629     break;
1630   case MAT_FORCE_DIAGONAL_ENTRIES:
1631   case MAT_IGNORE_OFF_PROC_ENTRIES:
1632   case MAT_USE_HASH_TABLE:
1633   case MAT_SORTED_FULL:
1634     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1635     break;
1636   case MAT_SPD:
1637   case MAT_SYMMETRIC:
1638   case MAT_STRUCTURALLY_SYMMETRIC:
1639   case MAT_HERMITIAN:
1640   case MAT_SYMMETRY_ETERNAL:
1641   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1642   case MAT_SUBMAT_SINGLEIS:
1643   case MAT_STRUCTURE_ONLY:
1644   case MAT_SPD_ETERNAL:
1645     /* if the diagonal matrix is square it inherits some of the properties above */
1646     break;
1647   default:
1648     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1649   }
1650   PetscFunctionReturn(PETSC_SUCCESS);
1651 }
1652 
1653 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1654 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1655 {
1656   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1657   MatScalar   *aa_i;
1658   PetscScalar *v_i;
1659 
1660   PetscFunctionBegin;
1661   bs  = A->rmap->bs;
1662   bs2 = bs * bs;
1663   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1664 
1665   bn  = row / bs; /* Block number */
1666   bp  = row % bs; /* Block Position */
1667   M   = ai[bn + 1] - ai[bn];
1668   *nz = bs * M;
1669 
1670   if (v) {
1671     *v = NULL;
1672     if (*nz) {
1673       PetscCall(PetscMalloc1(*nz, v));
1674       for (i = 0; i < M; i++) { /* for each block in the block row */
1675         v_i  = *v + i * bs;
1676         aa_i = aa + bs2 * (ai[bn] + i);
1677         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1678       }
1679     }
1680   }
1681 
1682   if (idx) {
1683     *idx = NULL;
1684     if (*nz) {
1685       PetscCall(PetscMalloc1(*nz, idx));
1686       for (i = 0; i < M; i++) { /* for each block in the block row */
1687         idx_i = *idx + i * bs;
1688         itmp  = bs * aj[ai[bn] + i];
1689         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1690       }
1691     }
1692   }
1693   PetscFunctionReturn(PETSC_SUCCESS);
1694 }
1695 
1696 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1697 {
1698   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1699 
1700   PetscFunctionBegin;
1701   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1702   PetscFunctionReturn(PETSC_SUCCESS);
1703 }
1704 
1705 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1706 {
1707   PetscFunctionBegin;
1708   if (idx) PetscCall(PetscFree(*idx));
1709   if (v) PetscCall(PetscFree(*v));
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1714 {
1715   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1716   Mat          C;
1717   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1718   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1719   MatScalar   *ata, *aa = a->a;
1720 
1721   PetscFunctionBegin;
1722   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1723   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1724   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1725     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1726 
1727     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1728     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1729     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1730     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1731 
1732     at  = (Mat_SeqBAIJ *)C->data;
1733     ati = at->i;
1734     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1735   } else {
1736     C   = *B;
1737     at  = (Mat_SeqBAIJ *)C->data;
1738     ati = at->i;
1739   }
1740 
1741   atj = at->j;
1742   ata = at->a;
1743 
1744   /* Copy ati into atfill so we have locations of the next free space in atj */
1745   PetscCall(PetscArraycpy(atfill, ati, nbs));
1746 
1747   /* Walk through A row-wise and mark nonzero entries of A^T. */
1748   for (i = 0; i < mbs; i++) {
1749     anzj = ai[i + 1] - ai[i];
1750     for (j = 0; j < anzj; j++) {
1751       atj[atfill[*aj]] = i;
1752       for (kr = 0; kr < bs; kr++) {
1753         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1754       }
1755       atfill[*aj++] += 1;
1756     }
1757   }
1758   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1759   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1760 
1761   /* Clean up temporary space and complete requests. */
1762   PetscCall(PetscFree(atfill));
1763 
1764   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1765     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1766     *B = C;
1767   } else {
1768     PetscCall(MatHeaderMerge(A, &C));
1769   }
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1774 {
1775   Mat Btrans;
1776 
1777   PetscFunctionBegin;
1778   *f = PETSC_FALSE;
1779   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1780   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1781   PetscCall(MatDestroy(&Btrans));
1782   PetscFunctionReturn(PETSC_SUCCESS);
1783 }
1784 
1785 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1786 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1787 {
1788   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1789   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1790   PetscInt    *rowlens, *colidxs;
1791   PetscScalar *matvals;
1792 
1793   PetscFunctionBegin;
1794   PetscCall(PetscViewerSetUp(viewer));
1795 
1796   M  = mat->rmap->N;
1797   N  = mat->cmap->N;
1798   m  = mat->rmap->n;
1799   bs = mat->rmap->bs;
1800   nz = bs * bs * A->nz;
1801 
1802   /* write matrix header */
1803   header[0] = MAT_FILE_CLASSID;
1804   header[1] = M;
1805   header[2] = N;
1806   header[3] = nz;
1807   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1808 
1809   /* store row lengths */
1810   PetscCall(PetscMalloc1(m, &rowlens));
1811   for (cnt = 0, i = 0; i < A->mbs; i++)
1812     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1813   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1814   PetscCall(PetscFree(rowlens));
1815 
1816   /* store column indices  */
1817   PetscCall(PetscMalloc1(nz, &colidxs));
1818   for (cnt = 0, i = 0; i < A->mbs; i++)
1819     for (k = 0; k < bs; k++)
1820       for (j = A->i[i]; j < A->i[i + 1]; j++)
1821         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1822   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1823   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1824   PetscCall(PetscFree(colidxs));
1825 
1826   /* store nonzero values */
1827   PetscCall(PetscMalloc1(nz, &matvals));
1828   for (cnt = 0, i = 0; i < A->mbs; i++)
1829     for (k = 0; k < bs; k++)
1830       for (j = A->i[i]; j < A->i[i + 1]; j++)
1831         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1832   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1833   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1834   PetscCall(PetscFree(matvals));
1835 
1836   /* write block size option to the viewer's .info file */
1837   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1838   PetscFunctionReturn(PETSC_SUCCESS);
1839 }
1840 
1841 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1842 {
1843   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1844   PetscInt     i, bs = A->rmap->bs, k;
1845 
1846   PetscFunctionBegin;
1847   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1848   for (i = 0; i < a->mbs; i++) {
1849     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1850     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1851     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1852   }
1853   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1854   PetscFunctionReturn(PETSC_SUCCESS);
1855 }
1856 
1857 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1858 {
1859   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1860   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1861   PetscViewerFormat format;
1862 
1863   PetscFunctionBegin;
1864   if (A->structure_only) {
1865     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1866     PetscFunctionReturn(PETSC_SUCCESS);
1867   }
1868 
1869   PetscCall(PetscViewerGetFormat(viewer, &format));
1870   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1871     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1872   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1873     const char *matname;
1874     Mat         aij;
1875     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1876     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1877     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1878     PetscCall(MatView(aij, viewer));
1879     PetscCall(MatDestroy(&aij));
1880   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1881     PetscFunctionReturn(PETSC_SUCCESS);
1882   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1883     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1884     for (i = 0; i < a->mbs; i++) {
1885       for (j = 0; j < bs; j++) {
1886         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1887         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1888           for (l = 0; l < bs; l++) {
1889 #if defined(PETSC_USE_COMPLEX)
1890             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1891               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1892             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1893               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1894             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1895               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1896             }
1897 #else
1898             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1899 #endif
1900           }
1901         }
1902         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1903       }
1904     }
1905     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1906   } else {
1907     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1908     for (i = 0; i < a->mbs; i++) {
1909       for (j = 0; j < bs; j++) {
1910         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1911         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1912           for (l = 0; l < bs; l++) {
1913 #if defined(PETSC_USE_COMPLEX)
1914             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1915               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1916             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1917               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1918             } else {
1919               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1920             }
1921 #else
1922             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1923 #endif
1924           }
1925         }
1926         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1927       }
1928     }
1929     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1930   }
1931   PetscCall(PetscViewerFlush(viewer));
1932   PetscFunctionReturn(PETSC_SUCCESS);
1933 }
1934 
1935 #include <petscdraw.h>
1936 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1937 {
1938   Mat               A = (Mat)Aa;
1939   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1940   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
1941   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1942   MatScalar        *aa;
1943   PetscViewer       viewer;
1944   PetscViewerFormat format;
1945 
1946   PetscFunctionBegin;
1947   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1948   PetscCall(PetscViewerGetFormat(viewer, &format));
1949   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1950 
1951   /* loop over matrix elements drawing boxes */
1952 
1953   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1954     PetscDrawCollectiveBegin(draw);
1955     /* Blue for negative, Cyan for zero and  Red for positive */
1956     color = PETSC_DRAW_BLUE;
1957     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1958       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1959         y_l = A->rmap->N - row - 1.0;
1960         y_r = y_l + 1.0;
1961         x_l = a->j[j] * bs;
1962         x_r = x_l + 1.0;
1963         aa  = a->a + j * bs2;
1964         for (k = 0; k < bs; k++) {
1965           for (l = 0; l < bs; l++) {
1966             if (PetscRealPart(*aa++) >= 0.) continue;
1967             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1968           }
1969         }
1970       }
1971     }
1972     color = PETSC_DRAW_CYAN;
1973     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1974       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1975         y_l = A->rmap->N - row - 1.0;
1976         y_r = y_l + 1.0;
1977         x_l = a->j[j] * bs;
1978         x_r = x_l + 1.0;
1979         aa  = a->a + j * bs2;
1980         for (k = 0; k < bs; k++) {
1981           for (l = 0; l < bs; l++) {
1982             if (PetscRealPart(*aa++) != 0.) continue;
1983             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1984           }
1985         }
1986       }
1987     }
1988     color = PETSC_DRAW_RED;
1989     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1990       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1991         y_l = A->rmap->N - row - 1.0;
1992         y_r = y_l + 1.0;
1993         x_l = a->j[j] * bs;
1994         x_r = x_l + 1.0;
1995         aa  = a->a + j * bs2;
1996         for (k = 0; k < bs; k++) {
1997           for (l = 0; l < bs; l++) {
1998             if (PetscRealPart(*aa++) <= 0.) continue;
1999             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2000           }
2001         }
2002       }
2003     }
2004     PetscDrawCollectiveEnd(draw);
2005   } else {
2006     /* use contour shading to indicate magnitude of values */
2007     /* first determine max of all nonzero values */
2008     PetscReal minv = 0.0, maxv = 0.0;
2009     PetscDraw popup;
2010 
2011     for (i = 0; i < a->nz * a->bs2; i++) {
2012       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2013     }
2014     if (minv >= maxv) maxv = minv + PETSC_SMALL;
2015     PetscCall(PetscDrawGetPopup(draw, &popup));
2016     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2017 
2018     PetscDrawCollectiveBegin(draw);
2019     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2020       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2021         y_l = A->rmap->N - row - 1.0;
2022         y_r = y_l + 1.0;
2023         x_l = a->j[j] * bs;
2024         x_r = x_l + 1.0;
2025         aa  = a->a + j * bs2;
2026         for (k = 0; k < bs; k++) {
2027           for (l = 0; l < bs; l++) {
2028             MatScalar v = *aa++;
2029             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2030             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2031           }
2032         }
2033       }
2034     }
2035     PetscDrawCollectiveEnd(draw);
2036   }
2037   PetscFunctionReturn(PETSC_SUCCESS);
2038 }
2039 
2040 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2041 {
2042   PetscReal xl, yl, xr, yr, w, h;
2043   PetscDraw draw;
2044   PetscBool isnull;
2045 
2046   PetscFunctionBegin;
2047   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2048   PetscCall(PetscDrawIsNull(draw, &isnull));
2049   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2050 
2051   xr = A->cmap->n;
2052   yr = A->rmap->N;
2053   h  = yr / 10.0;
2054   w  = xr / 10.0;
2055   xr += w;
2056   yr += h;
2057   xl = -w;
2058   yl = -h;
2059   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2060   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2061   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2062   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2063   PetscCall(PetscDrawSave(draw));
2064   PetscFunctionReturn(PETSC_SUCCESS);
2065 }
2066 
2067 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2068 {
2069   PetscBool iascii, isbinary, isdraw;
2070 
2071   PetscFunctionBegin;
2072   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2073   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2074   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2075   if (iascii) {
2076     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2077   } else if (isbinary) {
2078     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2079   } else if (isdraw) {
2080     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2081   } else {
2082     Mat B;
2083     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2084     PetscCall(MatView(B, viewer));
2085     PetscCall(MatDestroy(&B));
2086   }
2087   PetscFunctionReturn(PETSC_SUCCESS);
2088 }
2089 
2090 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2091 {
2092   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2093   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2094   PetscInt    *ai = a->i, *ailen = a->ilen;
2095   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2096   MatScalar   *ap, *aa = a->a;
2097 
2098   PetscFunctionBegin;
2099   for (k = 0; k < m; k++) { /* loop over rows */
2100     row  = im[k];
2101     brow = row / bs;
2102     if (row < 0) {
2103       v += n;
2104       continue;
2105     } /* negative row */
2106     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2107     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2108     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
2109     nrow = ailen[brow];
2110     for (l = 0; l < n; l++) { /* loop over columns */
2111       if (in[l] < 0) {
2112         v++;
2113         continue;
2114       } /* negative column */
2115       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2116       col  = in[l];
2117       bcol = col / bs;
2118       cidx = col % bs;
2119       ridx = row % bs;
2120       high = nrow;
2121       low  = 0; /* assume unsorted */
2122       while (high - low > 5) {
2123         t = (low + high) / 2;
2124         if (rp[t] > bcol) high = t;
2125         else low = t;
2126       }
2127       for (i = low; i < high; i++) {
2128         if (rp[i] > bcol) break;
2129         if (rp[i] == bcol) {
2130           *v++ = ap[bs2 * i + bs * cidx + ridx];
2131           goto finished;
2132         }
2133       }
2134       *v++ = 0.0;
2135     finished:;
2136     }
2137   }
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2142 {
2143   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2144   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2145   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2146   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2147   PetscBool          roworiented = a->roworiented;
2148   const PetscScalar *value       = v;
2149   MatScalar         *ap = NULL, *aa = a->a, *bap;
2150 
2151   PetscFunctionBegin;
2152   if (roworiented) {
2153     stepval = (n - 1) * bs;
2154   } else {
2155     stepval = (m - 1) * bs;
2156   }
2157   for (k = 0; k < m; k++) { /* loop over added rows */
2158     row = im[k];
2159     if (row < 0) continue;
2160     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2161     rp = aj + ai[row];
2162     if (!A->structure_only) ap = aa + bs2 * ai[row];
2163     rmax = imax[row];
2164     nrow = ailen[row];
2165     low  = 0;
2166     high = nrow;
2167     for (l = 0; l < n; l++) { /* loop over added columns */
2168       if (in[l] < 0) continue;
2169       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2170       col = in[l];
2171       if (!A->structure_only) {
2172         if (roworiented) {
2173           value = v + (k * (stepval + bs) + l) * bs;
2174         } else {
2175           value = v + (l * (stepval + bs) + k) * bs;
2176         }
2177       }
2178       if (col <= lastcol) low = 0;
2179       else high = nrow;
2180       lastcol = col;
2181       while (high - low > 7) {
2182         t = (low + high) / 2;
2183         if (rp[t] > col) high = t;
2184         else low = t;
2185       }
2186       for (i = low; i < high; i++) {
2187         if (rp[i] > col) break;
2188         if (rp[i] == col) {
2189           if (A->structure_only) goto noinsert2;
2190           bap = ap + bs2 * i;
2191           if (roworiented) {
2192             if (is == ADD_VALUES) {
2193               for (ii = 0; ii < bs; ii++, value += stepval) {
2194                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2195               }
2196             } else {
2197               for (ii = 0; ii < bs; ii++, value += stepval) {
2198                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2199               }
2200             }
2201           } else {
2202             if (is == ADD_VALUES) {
2203               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2204                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2205                 bap += bs;
2206               }
2207             } else {
2208               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2209                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2210                 bap += bs;
2211               }
2212             }
2213           }
2214           goto noinsert2;
2215         }
2216       }
2217       if (nonew == 1) goto noinsert2;
2218       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2219       if (A->structure_only) {
2220         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2221       } else {
2222         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2223       }
2224       N = nrow++ - 1;
2225       high++;
2226       /* shift up all the later entries in this row */
2227       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2228       rp[i] = col;
2229       if (!A->structure_only) {
2230         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2231         bap = ap + bs2 * i;
2232         if (roworiented) {
2233           for (ii = 0; ii < bs; ii++, value += stepval) {
2234             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2235           }
2236         } else {
2237           for (ii = 0; ii < bs; ii++, value += stepval) {
2238             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2239           }
2240         }
2241       }
2242     noinsert2:;
2243       low = i;
2244     }
2245     ailen[row] = nrow;
2246   }
2247   PetscFunctionReturn(PETSC_SUCCESS);
2248 }
2249 
2250 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2251 {
2252   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2253   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2254   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2255   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2256   MatScalar   *aa    = a->a, *ap;
2257   PetscReal    ratio = 0.6;
2258 
2259   PetscFunctionBegin;
2260   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2261 
2262   if (m) rmax = ailen[0];
2263   for (i = 1; i < mbs; i++) {
2264     /* move each row back by the amount of empty slots (fshift) before it*/
2265     fshift += imax[i - 1] - ailen[i - 1];
2266     rmax = PetscMax(rmax, ailen[i]);
2267     if (fshift) {
2268       ip = aj + ai[i];
2269       ap = aa + bs2 * ai[i];
2270       N  = ailen[i];
2271       PetscCall(PetscArraymove(ip - fshift, ip, N));
2272       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2273     }
2274     ai[i] = ai[i - 1] + ailen[i - 1];
2275   }
2276   if (mbs) {
2277     fshift += imax[mbs - 1] - ailen[mbs - 1];
2278     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2279   }
2280 
2281   /* reset ilen and imax for each row */
2282   a->nonzerorowcnt = 0;
2283   if (A->structure_only) {
2284     PetscCall(PetscFree2(a->imax, a->ilen));
2285   } else { /* !A->structure_only */
2286     for (i = 0; i < mbs; i++) {
2287       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2288       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2289     }
2290   }
2291   a->nz = ai[mbs];
2292 
2293   /* diagonals may have moved, so kill the diagonal pointers */
2294   a->idiagvalid = PETSC_FALSE;
2295   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2296   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2297   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2298   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2299   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2300 
2301   A->info.mallocs += a->reallocs;
2302   a->reallocs         = 0;
2303   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2304   a->rmax             = rmax;
2305 
2306   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2307   PetscFunctionReturn(PETSC_SUCCESS);
2308 }
2309 
2310 /*
2311    This function returns an array of flags which indicate the locations of contiguous
2312    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2313    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2314    Assume: sizes should be long enough to hold all the values.
2315 */
2316 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2317 {
2318   PetscInt j = 0;
2319 
2320   PetscFunctionBegin;
2321   for (PetscInt i = 0; i < n; j++) {
2322     PetscInt row = idx[i];
2323     if (row % bs != 0) { /* Not the beginning of a block */
2324       sizes[j] = 1;
2325       i++;
2326     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2327       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2328       i++;
2329     } else { /* Beginning of the block, so check if the complete block exists */
2330       PetscBool flg = PETSC_TRUE;
2331       for (PetscInt k = 1; k < bs; k++) {
2332         if (row + k != idx[i + k]) { /* break in the block */
2333           flg = PETSC_FALSE;
2334           break;
2335         }
2336       }
2337       if (flg) { /* No break in the bs */
2338         sizes[j] = bs;
2339         i += bs;
2340       } else {
2341         sizes[j] = 1;
2342         i++;
2343       }
2344     }
2345   }
2346   *bs_max = j;
2347   PetscFunctionReturn(PETSC_SUCCESS);
2348 }
2349 
2350 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2351 {
2352   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2353   PetscInt           i, j, k, count, *rows;
2354   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2355   PetscScalar        zero = 0.0;
2356   MatScalar         *aa;
2357   const PetscScalar *xx;
2358   PetscScalar       *bb;
2359 
2360   PetscFunctionBegin;
2361   /* fix right hand side if needed */
2362   if (x && b) {
2363     PetscCall(VecGetArrayRead(x, &xx));
2364     PetscCall(VecGetArray(b, &bb));
2365     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2366     PetscCall(VecRestoreArrayRead(x, &xx));
2367     PetscCall(VecRestoreArray(b, &bb));
2368   }
2369 
2370   /* Make a copy of the IS and  sort it */
2371   /* allocate memory for rows,sizes */
2372   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2373 
2374   /* copy IS values to rows, and sort them */
2375   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2376   PetscCall(PetscSortInt(is_n, rows));
2377 
2378   if (baij->keepnonzeropattern) {
2379     for (i = 0; i < is_n; i++) sizes[i] = 1;
2380     bs_max = is_n;
2381   } else {
2382     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2383     A->nonzerostate++;
2384   }
2385 
2386   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2387     row = rows[j];
2388     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2389     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2390     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2391     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2392       if (diag != (PetscScalar)0.0) {
2393         if (baij->ilen[row / bs] > 0) {
2394           baij->ilen[row / bs]       = 1;
2395           baij->j[baij->i[row / bs]] = row / bs;
2396 
2397           PetscCall(PetscArrayzero(aa, count * bs));
2398         }
2399         /* Now insert all the diagonal values for this bs */
2400         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2401       } else { /* (diag == 0.0) */
2402         baij->ilen[row / bs] = 0;
2403       }      /* end (diag == 0.0) */
2404     } else { /* (sizes[i] != bs) */
2405       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2406       for (k = 0; k < count; k++) {
2407         aa[0] = zero;
2408         aa += bs;
2409       }
2410       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2411     }
2412   }
2413 
2414   PetscCall(PetscFree2(rows, sizes));
2415   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2416   PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418 
2419 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2420 {
2421   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2422   PetscInt           i, j, k, count;
2423   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2424   PetscScalar        zero = 0.0;
2425   MatScalar         *aa;
2426   const PetscScalar *xx;
2427   PetscScalar       *bb;
2428   PetscBool         *zeroed, vecs = PETSC_FALSE;
2429 
2430   PetscFunctionBegin;
2431   /* fix right hand side if needed */
2432   if (x && b) {
2433     PetscCall(VecGetArrayRead(x, &xx));
2434     PetscCall(VecGetArray(b, &bb));
2435     vecs = PETSC_TRUE;
2436   }
2437 
2438   /* zero the columns */
2439   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2440   for (i = 0; i < is_n; i++) {
2441     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2442     zeroed[is_idx[i]] = PETSC_TRUE;
2443   }
2444   for (i = 0; i < A->rmap->N; i++) {
2445     if (!zeroed[i]) {
2446       row = i / bs;
2447       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2448         for (k = 0; k < bs; k++) {
2449           col = bs * baij->j[j] + k;
2450           if (zeroed[col]) {
2451             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
2452             if (vecs) bb[i] -= aa[0] * xx[col];
2453             aa[0] = 0.0;
2454           }
2455         }
2456       }
2457     } else if (vecs) bb[i] = diag * xx[i];
2458   }
2459   PetscCall(PetscFree(zeroed));
2460   if (vecs) {
2461     PetscCall(VecRestoreArrayRead(x, &xx));
2462     PetscCall(VecRestoreArray(b, &bb));
2463   }
2464 
2465   /* zero the rows */
2466   for (i = 0; i < is_n; i++) {
2467     row   = is_idx[i];
2468     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2469     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2470     for (k = 0; k < count; k++) {
2471       aa[0] = zero;
2472       aa += bs;
2473     }
2474     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2475   }
2476   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2477   PetscFunctionReturn(PETSC_SUCCESS);
2478 }
2479 
2480 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2481 {
2482   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2483   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2484   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2485   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2486   PetscInt     ridx, cidx, bs2                 = a->bs2;
2487   PetscBool    roworiented = a->roworiented;
2488   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2489 
2490   PetscFunctionBegin;
2491   for (k = 0; k < m; k++) { /* loop over added rows */
2492     row  = im[k];
2493     brow = row / bs;
2494     if (row < 0) continue;
2495     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2496     rp = aj + ai[brow];
2497     if (!A->structure_only) ap = aa + bs2 * ai[brow];
2498     rmax = imax[brow];
2499     nrow = ailen[brow];
2500     low  = 0;
2501     high = nrow;
2502     for (l = 0; l < n; l++) { /* loop over added columns */
2503       if (in[l] < 0) continue;
2504       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2505       col  = in[l];
2506       bcol = col / bs;
2507       ridx = row % bs;
2508       cidx = col % bs;
2509       if (!A->structure_only) {
2510         if (roworiented) {
2511           value = v[l + k * n];
2512         } else {
2513           value = v[k + l * m];
2514         }
2515       }
2516       if (col <= lastcol) low = 0;
2517       else high = nrow;
2518       lastcol = col;
2519       while (high - low > 7) {
2520         t = (low + high) / 2;
2521         if (rp[t] > bcol) high = t;
2522         else low = t;
2523       }
2524       for (i = low; i < high; i++) {
2525         if (rp[i] > bcol) break;
2526         if (rp[i] == bcol) {
2527           bap = ap + bs2 * i + bs * cidx + ridx;
2528           if (!A->structure_only) {
2529             if (is == ADD_VALUES) *bap += value;
2530             else *bap = value;
2531           }
2532           goto noinsert1;
2533         }
2534       }
2535       if (nonew == 1) goto noinsert1;
2536       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2537       if (A->structure_only) {
2538         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2539       } else {
2540         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2541       }
2542       N = nrow++ - 1;
2543       high++;
2544       /* shift up all the later entries in this row */
2545       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2546       rp[i] = bcol;
2547       if (!A->structure_only) {
2548         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2549         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2550         ap[bs2 * i + bs * cidx + ridx] = value;
2551       }
2552       a->nz++;
2553       A->nonzerostate++;
2554     noinsert1:;
2555       low = i;
2556     }
2557     ailen[brow] = nrow;
2558   }
2559   PetscFunctionReturn(PETSC_SUCCESS);
2560 }
2561 
2562 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2563 {
2564   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2565   Mat          outA;
2566   PetscBool    row_identity, col_identity;
2567 
2568   PetscFunctionBegin;
2569   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2570   PetscCall(ISIdentity(row, &row_identity));
2571   PetscCall(ISIdentity(col, &col_identity));
2572   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2573 
2574   outA            = inA;
2575   inA->factortype = MAT_FACTOR_LU;
2576   PetscCall(PetscFree(inA->solvertype));
2577   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2578 
2579   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2580 
2581   PetscCall(PetscObjectReference((PetscObject)row));
2582   PetscCall(ISDestroy(&a->row));
2583   a->row = row;
2584   PetscCall(PetscObjectReference((PetscObject)col));
2585   PetscCall(ISDestroy(&a->col));
2586   a->col = col;
2587 
2588   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2589   PetscCall(ISDestroy(&a->icol));
2590   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2591 
2592   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2593   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2594   PetscCall(MatLUFactorNumeric(outA, inA, info));
2595   PetscFunctionReturn(PETSC_SUCCESS);
2596 }
2597 
2598 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2599 {
2600   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2601 
2602   PetscFunctionBegin;
2603   baij->nz = baij->maxnz;
2604   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2605   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2606   PetscFunctionReturn(PETSC_SUCCESS);
2607 }
2608 
2609 /*@
2610   MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
2611 
2612   Input Parameters:
2613 + mat     - the `MATSEQBAIJ` matrix
2614 - indices - the column indices
2615 
2616   Level: advanced
2617 
2618   Notes:
2619   This can be called if you have precomputed the nonzero structure of the
2620   matrix and want to provide it to the matrix object to improve the performance
2621   of the `MatSetValues()` operation.
2622 
2623   You MUST have set the correct numbers of nonzeros per row in the call to
2624   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2625 
2626   MUST be called before any calls to `MatSetValues()`
2627 
2628 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2629 @*/
2630 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2631 {
2632   PetscFunctionBegin;
2633   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2634   PetscAssertPointer(indices, 2);
2635   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
2636   PetscFunctionReturn(PETSC_SUCCESS);
2637 }
2638 
2639 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2640 {
2641   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2642   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2643   PetscReal    atmp;
2644   PetscScalar *x, zero = 0.0;
2645   MatScalar   *aa;
2646   PetscInt     ncols, brow, krow, kcol;
2647 
2648   PetscFunctionBegin;
2649   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2650   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2651   bs  = A->rmap->bs;
2652   aa  = a->a;
2653   ai  = a->i;
2654   aj  = a->j;
2655   mbs = a->mbs;
2656 
2657   PetscCall(VecSet(v, zero));
2658   PetscCall(VecGetArray(v, &x));
2659   PetscCall(VecGetLocalSize(v, &n));
2660   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2661   for (i = 0; i < mbs; i++) {
2662     ncols = ai[1] - ai[0];
2663     ai++;
2664     brow = bs * i;
2665     for (j = 0; j < ncols; j++) {
2666       for (kcol = 0; kcol < bs; kcol++) {
2667         for (krow = 0; krow < bs; krow++) {
2668           atmp = PetscAbsScalar(*aa);
2669           aa++;
2670           row = brow + krow; /* row index */
2671           if (PetscAbsScalar(x[row]) < atmp) {
2672             x[row] = atmp;
2673             if (idx) idx[row] = bs * (*aj) + kcol;
2674           }
2675         }
2676       }
2677       aj++;
2678     }
2679   }
2680   PetscCall(VecRestoreArray(v, &x));
2681   PetscFunctionReturn(PETSC_SUCCESS);
2682 }
2683 
2684 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2685 {
2686   PetscFunctionBegin;
2687   /* If the two matrices have the same copy implementation, use fast copy. */
2688   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2689     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2690     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2691     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2692 
2693     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2694     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2695     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2696     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2697   } else {
2698     PetscCall(MatCopy_Basic(A, B, str));
2699   }
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2704 {
2705   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2706 
2707   PetscFunctionBegin;
2708   *array = a->a;
2709   PetscFunctionReturn(PETSC_SUCCESS);
2710 }
2711 
2712 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2713 {
2714   PetscFunctionBegin;
2715   *array = NULL;
2716   PetscFunctionReturn(PETSC_SUCCESS);
2717 }
2718 
2719 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2720 {
2721   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2722   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2723   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2724 
2725   PetscFunctionBegin;
2726   /* Set the number of nonzeros in the new matrix */
2727   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2728   PetscFunctionReturn(PETSC_SUCCESS);
2729 }
2730 
2731 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2732 {
2733   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2734   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2735   PetscBLASInt one = 1;
2736 
2737   PetscFunctionBegin;
2738   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2739     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2740     if (e) {
2741       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2742       if (e) {
2743         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2744         if (e) str = SAME_NONZERO_PATTERN;
2745       }
2746     }
2747     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2748   }
2749   if (str == SAME_NONZERO_PATTERN) {
2750     PetscScalar  alpha = a;
2751     PetscBLASInt bnz;
2752     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2753     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2754     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2755   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2756     PetscCall(MatAXPY_Basic(Y, a, X, str));
2757   } else {
2758     Mat       B;
2759     PetscInt *nnz;
2760     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2761     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2762     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2763     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2764     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2765     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2766     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2767     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2768     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2769     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2770     PetscCall(MatHeaderMerge(Y, &B));
2771     PetscCall(PetscFree(nnz));
2772   }
2773   PetscFunctionReturn(PETSC_SUCCESS);
2774 }
2775 
2776 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2777 {
2778 #if PetscDefined(USE_COMPLEX)
2779   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2780   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2781   MatScalar   *aa = a->a;
2782 
2783   PetscFunctionBegin;
2784   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2785   PetscFunctionReturn(PETSC_SUCCESS);
2786 #else
2787   (void)A;
2788   return PETSC_SUCCESS;
2789 #endif
2790 }
2791 
2792 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2793 {
2794 #if PetscDefined(USE_COMPLEX)
2795   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2796   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2797   MatScalar   *aa = a->a;
2798 
2799   PetscFunctionBegin;
2800   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2801   PetscFunctionReturn(PETSC_SUCCESS);
2802 #else
2803   (void)A;
2804   return PETSC_SUCCESS;
2805 #endif
2806 }
2807 
2808 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2809 {
2810 #if PetscDefined(USE_COMPLEX)
2811   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2812   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2813   MatScalar   *aa = a->a;
2814 
2815   PetscFunctionBegin;
2816   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2817   PetscFunctionReturn(PETSC_SUCCESS);
2818 #else
2819   (void)A;
2820   return PETSC_SUCCESS;
2821 #endif
2822 }
2823 
2824 /*
2825     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2826 */
2827 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2828 {
2829   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2830   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2831   PetscInt     nz = a->i[m], row, *jj, mr, col;
2832 
2833   PetscFunctionBegin;
2834   *nn = n;
2835   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2836   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2837   PetscCall(PetscCalloc1(n, &collengths));
2838   PetscCall(PetscMalloc1(n + 1, &cia));
2839   PetscCall(PetscMalloc1(nz, &cja));
2840   jj = a->j;
2841   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2842   cia[0] = oshift;
2843   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2844   PetscCall(PetscArrayzero(collengths, n));
2845   jj = a->j;
2846   for (row = 0; row < m; row++) {
2847     mr = a->i[row + 1] - a->i[row];
2848     for (i = 0; i < mr; i++) {
2849       col = *jj++;
2850 
2851       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2852     }
2853   }
2854   PetscCall(PetscFree(collengths));
2855   *ia = cia;
2856   *ja = cja;
2857   PetscFunctionReturn(PETSC_SUCCESS);
2858 }
2859 
2860 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2861 {
2862   PetscFunctionBegin;
2863   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2864   PetscCall(PetscFree(*ia));
2865   PetscCall(PetscFree(*ja));
2866   PetscFunctionReturn(PETSC_SUCCESS);
2867 }
2868 
2869 /*
2870  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2871  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2872  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2873  */
2874 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2875 {
2876   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2877   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2878   PetscInt     nz = a->i[m], row, *jj, mr, col;
2879   PetscInt    *cspidx;
2880 
2881   PetscFunctionBegin;
2882   *nn = n;
2883   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2884 
2885   PetscCall(PetscCalloc1(n, &collengths));
2886   PetscCall(PetscMalloc1(n + 1, &cia));
2887   PetscCall(PetscMalloc1(nz, &cja));
2888   PetscCall(PetscMalloc1(nz, &cspidx));
2889   jj = a->j;
2890   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2891   cia[0] = oshift;
2892   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2893   PetscCall(PetscArrayzero(collengths, n));
2894   jj = a->j;
2895   for (row = 0; row < m; row++) {
2896     mr = a->i[row + 1] - a->i[row];
2897     for (i = 0; i < mr; i++) {
2898       col                                         = *jj++;
2899       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2900       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2901     }
2902   }
2903   PetscCall(PetscFree(collengths));
2904   *ia    = cia;
2905   *ja    = cja;
2906   *spidx = cspidx;
2907   PetscFunctionReturn(PETSC_SUCCESS);
2908 }
2909 
2910 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2911 {
2912   PetscFunctionBegin;
2913   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2914   PetscCall(PetscFree(*spidx));
2915   PetscFunctionReturn(PETSC_SUCCESS);
2916 }
2917 
2918 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2919 {
2920   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2921 
2922   PetscFunctionBegin;
2923   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2924   PetscCall(MatShift_Basic(Y, a));
2925   PetscFunctionReturn(PETSC_SUCCESS);
2926 }
2927 
2928 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2929 {
2930   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2931   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2932   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2933   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2934   MatScalar   *aa = a->a, *ap;
2935   PetscBool    zero;
2936 
2937   PetscFunctionBegin;
2938   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2939   if (m) rmax = ailen[0];
2940   for (i = 1; i <= mbs; i++) {
2941     for (k = ai[i - 1]; k < ai[i]; k++) {
2942       zero = PETSC_TRUE;
2943       ap   = aa + bs2 * k;
2944       for (j = 0; j < bs2 && zero; j++) {
2945         if (ap[j] != 0.0) zero = PETSC_FALSE;
2946       }
2947       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2948       else {
2949         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2950         aj[k - fshift] = aj[k];
2951         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2952       }
2953     }
2954     ai[i - 1] -= fshift_prev;
2955     fshift_prev  = fshift;
2956     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2957     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2958     rmax = PetscMax(rmax, ailen[i - 1]);
2959   }
2960   if (fshift) {
2961     if (mbs) {
2962       ai[mbs] -= fshift;
2963       a->nz = ai[mbs];
2964     }
2965     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2966     A->nonzerostate++;
2967     A->info.nz_unneeded += (PetscReal)fshift;
2968     a->rmax = rmax;
2969     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2970     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2971   }
2972   PetscFunctionReturn(PETSC_SUCCESS);
2973 }
2974 
2975 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2976                                        MatGetRow_SeqBAIJ,
2977                                        MatRestoreRow_SeqBAIJ,
2978                                        MatMult_SeqBAIJ_N,
2979                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2980                                        MatMultTranspose_SeqBAIJ,
2981                                        MatMultTransposeAdd_SeqBAIJ,
2982                                        NULL,
2983                                        NULL,
2984                                        NULL,
2985                                        /* 10*/ NULL,
2986                                        MatLUFactor_SeqBAIJ,
2987                                        NULL,
2988                                        NULL,
2989                                        MatTranspose_SeqBAIJ,
2990                                        /* 15*/ MatGetInfo_SeqBAIJ,
2991                                        MatEqual_SeqBAIJ,
2992                                        MatGetDiagonal_SeqBAIJ,
2993                                        MatDiagonalScale_SeqBAIJ,
2994                                        MatNorm_SeqBAIJ,
2995                                        /* 20*/ NULL,
2996                                        MatAssemblyEnd_SeqBAIJ,
2997                                        MatSetOption_SeqBAIJ,
2998                                        MatZeroEntries_SeqBAIJ,
2999                                        /* 24*/ MatZeroRows_SeqBAIJ,
3000                                        NULL,
3001                                        NULL,
3002                                        NULL,
3003                                        NULL,
3004                                        /* 29*/ MatSetUp_Seq_Hash,
3005                                        NULL,
3006                                        NULL,
3007                                        NULL,
3008                                        NULL,
3009                                        /* 34*/ MatDuplicate_SeqBAIJ,
3010                                        NULL,
3011                                        NULL,
3012                                        MatILUFactor_SeqBAIJ,
3013                                        NULL,
3014                                        /* 39*/ MatAXPY_SeqBAIJ,
3015                                        MatCreateSubMatrices_SeqBAIJ,
3016                                        MatIncreaseOverlap_SeqBAIJ,
3017                                        MatGetValues_SeqBAIJ,
3018                                        MatCopy_SeqBAIJ,
3019                                        /* 44*/ NULL,
3020                                        MatScale_SeqBAIJ,
3021                                        MatShift_SeqBAIJ,
3022                                        NULL,
3023                                        MatZeroRowsColumns_SeqBAIJ,
3024                                        /* 49*/ NULL,
3025                                        MatGetRowIJ_SeqBAIJ,
3026                                        MatRestoreRowIJ_SeqBAIJ,
3027                                        MatGetColumnIJ_SeqBAIJ,
3028                                        MatRestoreColumnIJ_SeqBAIJ,
3029                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3030                                        NULL,
3031                                        NULL,
3032                                        NULL,
3033                                        MatSetValuesBlocked_SeqBAIJ,
3034                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3035                                        MatDestroy_SeqBAIJ,
3036                                        MatView_SeqBAIJ,
3037                                        NULL,
3038                                        NULL,
3039                                        /* 64*/ NULL,
3040                                        NULL,
3041                                        NULL,
3042                                        NULL,
3043                                        NULL,
3044                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
3045                                        NULL,
3046                                        MatConvert_Basic,
3047                                        NULL,
3048                                        NULL,
3049                                        /* 74*/ NULL,
3050                                        MatFDColoringApply_BAIJ,
3051                                        NULL,
3052                                        NULL,
3053                                        NULL,
3054                                        /* 79*/ NULL,
3055                                        NULL,
3056                                        NULL,
3057                                        NULL,
3058                                        MatLoad_SeqBAIJ,
3059                                        /* 84*/ NULL,
3060                                        NULL,
3061                                        NULL,
3062                                        NULL,
3063                                        NULL,
3064                                        /* 89*/ NULL,
3065                                        NULL,
3066                                        NULL,
3067                                        NULL,
3068                                        NULL,
3069                                        /* 94*/ NULL,
3070                                        NULL,
3071                                        NULL,
3072                                        NULL,
3073                                        NULL,
3074                                        /* 99*/ NULL,
3075                                        NULL,
3076                                        NULL,
3077                                        MatConjugate_SeqBAIJ,
3078                                        NULL,
3079                                        /*104*/ NULL,
3080                                        MatRealPart_SeqBAIJ,
3081                                        MatImaginaryPart_SeqBAIJ,
3082                                        NULL,
3083                                        NULL,
3084                                        /*109*/ NULL,
3085                                        NULL,
3086                                        NULL,
3087                                        NULL,
3088                                        MatMissingDiagonal_SeqBAIJ,
3089                                        /*114*/ NULL,
3090                                        NULL,
3091                                        NULL,
3092                                        NULL,
3093                                        NULL,
3094                                        /*119*/ NULL,
3095                                        NULL,
3096                                        MatMultHermitianTranspose_SeqBAIJ,
3097                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3098                                        NULL,
3099                                        /*124*/ NULL,
3100                                        MatGetColumnReductions_SeqBAIJ,
3101                                        MatInvertBlockDiagonal_SeqBAIJ,
3102                                        NULL,
3103                                        NULL,
3104                                        /*129*/ NULL,
3105                                        NULL,
3106                                        NULL,
3107                                        NULL,
3108                                        NULL,
3109                                        /*134*/ NULL,
3110                                        NULL,
3111                                        NULL,
3112                                        NULL,
3113                                        NULL,
3114                                        /*139*/ MatSetBlockSizes_Default,
3115                                        NULL,
3116                                        NULL,
3117                                        MatFDColoringSetUp_SeqXAIJ,
3118                                        NULL,
3119                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3120                                        MatDestroySubMatrices_SeqBAIJ,
3121                                        NULL,
3122                                        NULL,
3123                                        NULL,
3124                                        NULL,
3125                                        /*150*/ NULL,
3126                                        MatEliminateZeros_SeqBAIJ};
3127 
3128 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3129 {
3130   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3131   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3132 
3133   PetscFunctionBegin;
3134   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3135 
3136   /* allocate space for values if not already there */
3137   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3138 
3139   /* copy values over */
3140   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3141   PetscFunctionReturn(PETSC_SUCCESS);
3142 }
3143 
3144 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3145 {
3146   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3147   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3148 
3149   PetscFunctionBegin;
3150   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3151   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3152 
3153   /* copy values over */
3154   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3155   PetscFunctionReturn(PETSC_SUCCESS);
3156 }
3157 
3158 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3159 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3160 
3161 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3162 {
3163   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3164   PetscInt     i, mbs, nbs, bs2;
3165   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3166 
3167   PetscFunctionBegin;
3168   if (B->hash_active) {
3169     PetscInt bs;
3170     B->ops[0] = b->cops;
3171     PetscCall(PetscHMapIJVDestroy(&b->ht));
3172     PetscCall(MatGetBlockSize(B, &bs));
3173     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3174     PetscCall(PetscFree(b->dnz));
3175     PetscCall(PetscFree(b->bdnz));
3176     B->hash_active = PETSC_FALSE;
3177   }
3178   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3179   if (nz == MAT_SKIP_ALLOCATION) {
3180     skipallocation = PETSC_TRUE;
3181     nz             = 0;
3182   }
3183 
3184   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3185   PetscCall(PetscLayoutSetUp(B->rmap));
3186   PetscCall(PetscLayoutSetUp(B->cmap));
3187   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3188 
3189   B->preallocated = PETSC_TRUE;
3190 
3191   mbs = B->rmap->n / bs;
3192   nbs = B->cmap->n / bs;
3193   bs2 = bs * bs;
3194 
3195   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3196 
3197   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3198   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3199   if (nnz) {
3200     for (i = 0; i < mbs; i++) {
3201       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3202       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3203     }
3204   }
3205 
3206   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3207   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3208   PetscOptionsEnd();
3209 
3210   if (!flg) {
3211     switch (bs) {
3212     case 1:
3213       B->ops->mult    = MatMult_SeqBAIJ_1;
3214       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3215       break;
3216     case 2:
3217       B->ops->mult    = MatMult_SeqBAIJ_2;
3218       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3219       break;
3220     case 3:
3221       B->ops->mult    = MatMult_SeqBAIJ_3;
3222       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3223       break;
3224     case 4:
3225       B->ops->mult    = MatMult_SeqBAIJ_4;
3226       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3227       break;
3228     case 5:
3229       B->ops->mult    = MatMult_SeqBAIJ_5;
3230       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3231       break;
3232     case 6:
3233       B->ops->mult    = MatMult_SeqBAIJ_6;
3234       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3235       break;
3236     case 7:
3237       B->ops->mult    = MatMult_SeqBAIJ_7;
3238       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3239       break;
3240     case 9: {
3241       PetscInt version = 1;
3242       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3243       switch (version) {
3244 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3245       case 1:
3246         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3247         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3248         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3249         break;
3250 #endif
3251       default:
3252         B->ops->mult    = MatMult_SeqBAIJ_N;
3253         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3254         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3255         break;
3256       }
3257       break;
3258     }
3259     case 11:
3260       B->ops->mult    = MatMult_SeqBAIJ_11;
3261       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3262       break;
3263     case 12: {
3264       PetscInt version = 1;
3265       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3266       switch (version) {
3267       case 1:
3268         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3269         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3270         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3271         break;
3272       case 2:
3273         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3274         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3275         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3276         break;
3277 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3278       case 3:
3279         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3280         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3281         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3282         break;
3283 #endif
3284       default:
3285         B->ops->mult    = MatMult_SeqBAIJ_N;
3286         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3287         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3288         break;
3289       }
3290       break;
3291     }
3292     case 15: {
3293       PetscInt version = 1;
3294       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3295       switch (version) {
3296       case 1:
3297         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3298         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3299         break;
3300       case 2:
3301         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3302         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3303         break;
3304       case 3:
3305         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3306         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3307         break;
3308       case 4:
3309         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3310         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3311         break;
3312       default:
3313         B->ops->mult = MatMult_SeqBAIJ_N;
3314         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3315         break;
3316       }
3317       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3318       break;
3319     }
3320     default:
3321       B->ops->mult    = MatMult_SeqBAIJ_N;
3322       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3323       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3324       break;
3325     }
3326   }
3327   B->ops->sor = MatSOR_SeqBAIJ;
3328   b->mbs      = mbs;
3329   b->nbs      = nbs;
3330   if (!skipallocation) {
3331     if (!b->imax) {
3332       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3333 
3334       b->free_imax_ilen = PETSC_TRUE;
3335     }
3336     /* b->ilen will count nonzeros in each block row so far. */
3337     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3338     if (!nnz) {
3339       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3340       else if (nz < 0) nz = 1;
3341       nz = PetscMin(nz, nbs);
3342       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3343       PetscCall(PetscIntMultError(nz, mbs, &nz));
3344     } else {
3345       PetscInt64 nz64 = 0;
3346       for (i = 0; i < mbs; i++) {
3347         b->imax[i] = nnz[i];
3348         nz64 += nnz[i];
3349       }
3350       PetscCall(PetscIntCast(nz64, &nz));
3351     }
3352 
3353     /* allocate the matrix space */
3354     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3355     if (B->structure_only) {
3356       PetscCall(PetscMalloc1(nz, &b->j));
3357       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3358     } else {
3359       PetscInt nzbs2 = 0;
3360       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3361       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
3362       PetscCall(PetscArrayzero(b->a, nz * bs2));
3363     }
3364     PetscCall(PetscArrayzero(b->j, nz));
3365 
3366     if (B->structure_only) {
3367       b->singlemalloc = PETSC_FALSE;
3368       b->free_a       = PETSC_FALSE;
3369     } else {
3370       b->singlemalloc = PETSC_TRUE;
3371       b->free_a       = PETSC_TRUE;
3372     }
3373     b->free_ij = PETSC_TRUE;
3374 
3375     b->i[0] = 0;
3376     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3377 
3378   } else {
3379     b->free_a  = PETSC_FALSE;
3380     b->free_ij = PETSC_FALSE;
3381   }
3382 
3383   b->bs2              = bs2;
3384   b->mbs              = mbs;
3385   b->nz               = 0;
3386   b->maxnz            = nz;
3387   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3388   B->was_assembled    = PETSC_FALSE;
3389   B->assembled        = PETSC_FALSE;
3390   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3391   PetscFunctionReturn(PETSC_SUCCESS);
3392 }
3393 
3394 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3395 {
3396   PetscInt     i, m, nz, nz_max = 0, *nnz;
3397   PetscScalar *values      = NULL;
3398   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3399 
3400   PetscFunctionBegin;
3401   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3402   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3403   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3404   PetscCall(PetscLayoutSetUp(B->rmap));
3405   PetscCall(PetscLayoutSetUp(B->cmap));
3406   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3407   m = B->rmap->n / bs;
3408 
3409   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3410   PetscCall(PetscMalloc1(m + 1, &nnz));
3411   for (i = 0; i < m; i++) {
3412     nz = ii[i + 1] - ii[i];
3413     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3414     nz_max = PetscMax(nz_max, nz);
3415     nnz[i] = nz;
3416   }
3417   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3418   PetscCall(PetscFree(nnz));
3419 
3420   values = (PetscScalar *)V;
3421   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3422   for (i = 0; i < m; i++) {
3423     PetscInt        ncols = ii[i + 1] - ii[i];
3424     const PetscInt *icols = jj + ii[i];
3425     if (bs == 1 || !roworiented) {
3426       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3427       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3428     } else {
3429       PetscInt j;
3430       for (j = 0; j < ncols; j++) {
3431         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3432         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3433       }
3434     }
3435   }
3436   if (!V) PetscCall(PetscFree(values));
3437   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3438   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3439   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3440   PetscFunctionReturn(PETSC_SUCCESS);
3441 }
3442 
3443 /*@C
3444   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3445 
3446   Not Collective
3447 
3448   Input Parameter:
3449 . A - a `MATSEQBAIJ` matrix
3450 
3451   Output Parameter:
3452 . array - pointer to the data
3453 
3454   Level: intermediate
3455 
3456 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3457 @*/
3458 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3459 {
3460   PetscFunctionBegin;
3461   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3462   PetscFunctionReturn(PETSC_SUCCESS);
3463 }
3464 
3465 /*@C
3466   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3467 
3468   Not Collective
3469 
3470   Input Parameters:
3471 + A     - a `MATSEQBAIJ` matrix
3472 - array - pointer to the data
3473 
3474   Level: intermediate
3475 
3476 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3477 @*/
3478 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3479 {
3480   PetscFunctionBegin;
3481   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3482   PetscFunctionReturn(PETSC_SUCCESS);
3483 }
3484 
3485 /*MC
3486    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3487    block sparse compressed row format.
3488 
3489    Options Database Keys:
3490 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3491 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3492 
3493    Level: beginner
3494 
3495    Notes:
3496     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3497     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3498 
3499    Run with `-info` to see what version of the matrix-vector product is being used
3500 
3501 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3502 M*/
3503 
3504 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3505 
3506 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3507 {
3508   PetscMPIInt  size;
3509   Mat_SeqBAIJ *b;
3510 
3511   PetscFunctionBegin;
3512   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3513   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3514 
3515   PetscCall(PetscNew(&b));
3516   B->data   = (void *)b;
3517   B->ops[0] = MatOps_Values;
3518 
3519   b->row          = NULL;
3520   b->col          = NULL;
3521   b->icol         = NULL;
3522   b->reallocs     = 0;
3523   b->saved_values = NULL;
3524 
3525   b->roworiented        = PETSC_TRUE;
3526   b->nonew              = 0;
3527   b->diag               = NULL;
3528   B->spptr              = NULL;
3529   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3530   b->keepnonzeropattern = PETSC_FALSE;
3531 
3532   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3533   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3534   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3535   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3536   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3537   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3538   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3539   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3540   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3541   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3542 #if defined(PETSC_HAVE_HYPRE)
3543   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3544 #endif
3545   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3546   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3547   PetscFunctionReturn(PETSC_SUCCESS);
3548 }
3549 
3550 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3551 {
3552   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3553   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3554 
3555   PetscFunctionBegin;
3556   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3557   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3558 
3559   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3560     c->imax           = a->imax;
3561     c->ilen           = a->ilen;
3562     c->free_imax_ilen = PETSC_FALSE;
3563   } else {
3564     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3565     for (i = 0; i < mbs; i++) {
3566       c->imax[i] = a->imax[i];
3567       c->ilen[i] = a->ilen[i];
3568     }
3569     c->free_imax_ilen = PETSC_TRUE;
3570   }
3571 
3572   /* allocate the matrix space */
3573   if (mallocmatspace) {
3574     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3575       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
3576 
3577       c->i            = a->i;
3578       c->j            = a->j;
3579       c->singlemalloc = PETSC_FALSE;
3580       c->free_a       = PETSC_TRUE;
3581       c->free_ij      = PETSC_FALSE;
3582       c->parent       = A;
3583       C->preallocated = PETSC_TRUE;
3584       C->assembled    = PETSC_TRUE;
3585 
3586       PetscCall(PetscObjectReference((PetscObject)A));
3587       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3588       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3589     } else {
3590       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
3591 
3592       c->singlemalloc = PETSC_TRUE;
3593       c->free_a       = PETSC_TRUE;
3594       c->free_ij      = PETSC_TRUE;
3595 
3596       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3597       if (mbs > 0) {
3598         PetscCall(PetscArraycpy(c->j, a->j, nz));
3599         if (cpvalues == MAT_COPY_VALUES) {
3600           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3601         } else {
3602           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3603         }
3604       }
3605       C->preallocated = PETSC_TRUE;
3606       C->assembled    = PETSC_TRUE;
3607     }
3608   }
3609 
3610   c->roworiented = a->roworiented;
3611   c->nonew       = a->nonew;
3612 
3613   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3614   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3615 
3616   c->bs2 = a->bs2;
3617   c->mbs = a->mbs;
3618   c->nbs = a->nbs;
3619 
3620   if (a->diag) {
3621     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3622       c->diag      = a->diag;
3623       c->free_diag = PETSC_FALSE;
3624     } else {
3625       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3626       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3627       c->free_diag = PETSC_TRUE;
3628     }
3629   } else c->diag = NULL;
3630 
3631   c->nz         = a->nz;
3632   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3633   c->solve_work = NULL;
3634   c->mult_work  = NULL;
3635   c->sor_workt  = NULL;
3636   c->sor_work   = NULL;
3637 
3638   c->compressedrow.use   = a->compressedrow.use;
3639   c->compressedrow.nrows = a->compressedrow.nrows;
3640   if (a->compressedrow.use) {
3641     i = a->compressedrow.nrows;
3642     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3643     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3644     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3645   } else {
3646     c->compressedrow.use    = PETSC_FALSE;
3647     c->compressedrow.i      = NULL;
3648     c->compressedrow.rindex = NULL;
3649   }
3650   c->nonzerorowcnt = a->nonzerorowcnt;
3651   C->nonzerostate  = A->nonzerostate;
3652 
3653   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3654   PetscFunctionReturn(PETSC_SUCCESS);
3655 }
3656 
3657 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3658 {
3659   PetscFunctionBegin;
3660   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3661   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3662   PetscCall(MatSetType(*B, MATSEQBAIJ));
3663   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3664   PetscFunctionReturn(PETSC_SUCCESS);
3665 }
3666 
3667 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3668 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3669 {
3670   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3671   PetscInt    *rowidxs, *colidxs;
3672   PetscScalar *matvals;
3673 
3674   PetscFunctionBegin;
3675   PetscCall(PetscViewerSetUp(viewer));
3676 
3677   /* read matrix header */
3678   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3679   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3680   M  = header[1];
3681   N  = header[2];
3682   nz = header[3];
3683   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3684   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3685   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3686 
3687   /* set block sizes from the viewer's .info file */
3688   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3689   /* set local and global sizes if not set already */
3690   if (mat->rmap->n < 0) mat->rmap->n = M;
3691   if (mat->cmap->n < 0) mat->cmap->n = N;
3692   if (mat->rmap->N < 0) mat->rmap->N = M;
3693   if (mat->cmap->N < 0) mat->cmap->N = N;
3694   PetscCall(PetscLayoutSetUp(mat->rmap));
3695   PetscCall(PetscLayoutSetUp(mat->cmap));
3696 
3697   /* check if the matrix sizes are correct */
3698   PetscCall(MatGetSize(mat, &rows, &cols));
3699   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3700   PetscCall(MatGetBlockSize(mat, &bs));
3701   PetscCall(MatGetLocalSize(mat, &m, &n));
3702   mbs = m / bs;
3703   nbs = n / bs;
3704 
3705   /* read in row lengths, column indices and nonzero values */
3706   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3707   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3708   rowidxs[0] = 0;
3709   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3710   sum = rowidxs[m];
3711   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3712 
3713   /* read in column indices and nonzero values */
3714   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3715   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3716   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3717 
3718   {               /* preallocate matrix storage */
3719     PetscBT   bt; /* helper bit set to count nonzeros */
3720     PetscInt *nnz;
3721     PetscBool sbaij;
3722 
3723     PetscCall(PetscBTCreate(nbs, &bt));
3724     PetscCall(PetscCalloc1(mbs, &nnz));
3725     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3726     for (i = 0; i < mbs; i++) {
3727       PetscCall(PetscBTMemzero(nbs, bt));
3728       for (k = 0; k < bs; k++) {
3729         PetscInt row = bs * i + k;
3730         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3731           PetscInt col = colidxs[j];
3732           if (!sbaij || col >= row)
3733             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3734         }
3735       }
3736     }
3737     PetscCall(PetscBTDestroy(&bt));
3738     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3739     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3740     PetscCall(PetscFree(nnz));
3741   }
3742 
3743   /* store matrix values */
3744   for (i = 0; i < m; i++) {
3745     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3746     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3747   }
3748 
3749   PetscCall(PetscFree(rowidxs));
3750   PetscCall(PetscFree2(colidxs, matvals));
3751   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3752   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3753   PetscFunctionReturn(PETSC_SUCCESS);
3754 }
3755 
3756 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3757 {
3758   PetscBool isbinary;
3759 
3760   PetscFunctionBegin;
3761   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3762   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3763   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3764   PetscFunctionReturn(PETSC_SUCCESS);
3765 }
3766 
3767 /*@C
3768   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3769   compressed row) format.  For good matrix assembly performance the
3770   user should preallocate the matrix storage by setting the parameter `nz`
3771   (or the array `nnz`).
3772 
3773   Collective
3774 
3775   Input Parameters:
3776 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3777 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3778           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3779 . m    - number of rows
3780 . n    - number of columns
3781 . nz   - number of nonzero blocks  per block row (same for all rows)
3782 - nnz  - array containing the number of nonzero blocks in the various block rows
3783          (possibly different for each block row) or `NULL`
3784 
3785   Output Parameter:
3786 . A - the matrix
3787 
3788   Options Database Keys:
3789 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3790 - -mat_block_size - size of the blocks to use
3791 
3792   Level: intermediate
3793 
3794   Notes:
3795   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3796   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3797   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3798 
3799   The number of rows and columns must be divisible by blocksize.
3800 
3801   If the `nnz` parameter is given then the `nz` parameter is ignored
3802 
3803   A nonzero block is any block that as 1 or more nonzeros in it
3804 
3805   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3806   storage.  That is, the stored row and column indices can begin at
3807   either one (as in Fortran) or zero.
3808 
3809   Specify the preallocated storage with either `nz` or `nnz` (not both).
3810   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3811   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3812   matrices.
3813 
3814 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3815 @*/
3816 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3817 {
3818   PetscFunctionBegin;
3819   PetscCall(MatCreate(comm, A));
3820   PetscCall(MatSetSizes(*A, m, n, m, n));
3821   PetscCall(MatSetType(*A, MATSEQBAIJ));
3822   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3823   PetscFunctionReturn(PETSC_SUCCESS);
3824 }
3825 
3826 /*@C
3827   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3828   per row in the matrix. For good matrix assembly performance the
3829   user should preallocate the matrix storage by setting the parameter `nz`
3830   (or the array `nnz`).
3831 
3832   Collective
3833 
3834   Input Parameters:
3835 + B   - the matrix
3836 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3837           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3838 . nz  - number of block nonzeros per block row (same for all rows)
3839 - nnz - array containing the number of block nonzeros in the various block rows
3840          (possibly different for each block row) or `NULL`
3841 
3842   Options Database Keys:
3843 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3844 - -mat_block_size - size of the blocks to use
3845 
3846   Level: intermediate
3847 
3848   Notes:
3849   If the `nnz` parameter is given then the `nz` parameter is ignored
3850 
3851   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3852   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3853   You can also run with the option `-info` and look for messages with the string
3854   malloc in them to see if additional memory allocation was needed.
3855 
3856   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3857   storage.  That is, the stored row and column indices can begin at
3858   either one (as in Fortran) or zero.
3859 
3860   Specify the preallocated storage with either nz or nnz (not both).
3861   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3862   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3863 
3864 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3865 @*/
3866 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3867 {
3868   PetscFunctionBegin;
3869   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3870   PetscValidType(B, 1);
3871   PetscValidLogicalCollectiveInt(B, bs, 2);
3872   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3873   PetscFunctionReturn(PETSC_SUCCESS);
3874 }
3875 
3876 /*@C
3877   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3878 
3879   Collective
3880 
3881   Input Parameters:
3882 + B  - the matrix
3883 . bs - the blocksize
3884 . i  - the indices into `j` for the start of each local row (starts with zero)
3885 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
3886 - v  - optional values in the matrix
3887 
3888   Level: advanced
3889 
3890   Notes:
3891   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3892   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3893   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3894   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3895   block column and the second index is over columns within a block.
3896 
3897   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3898 
3899 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3900 @*/
3901 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3902 {
3903   PetscFunctionBegin;
3904   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3905   PetscValidType(B, 1);
3906   PetscValidLogicalCollectiveInt(B, bs, 2);
3907   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3908   PetscFunctionReturn(PETSC_SUCCESS);
3909 }
3910 
3911 /*@
3912   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3913 
3914   Collective
3915 
3916   Input Parameters:
3917 + comm - must be an MPI communicator of size 1
3918 . bs   - size of block
3919 . m    - number of rows
3920 . n    - number of columns
3921 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3922 . j    - column indices
3923 - a    - matrix values
3924 
3925   Output Parameter:
3926 . mat - the matrix
3927 
3928   Level: advanced
3929 
3930   Notes:
3931   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3932   once the matrix is destroyed
3933 
3934   You cannot set new nonzero locations into this matrix, that will generate an error.
3935 
3936   The `i` and `j` indices are 0 based
3937 
3938   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3939 
3940   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3941   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3942   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3943   with column-major ordering within blocks.
3944 
3945 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3946 @*/
3947 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3948 {
3949   Mat_SeqBAIJ *baij;
3950 
3951   PetscFunctionBegin;
3952   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3953   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3954 
3955   PetscCall(MatCreate(comm, mat));
3956   PetscCall(MatSetSizes(*mat, m, n, m, n));
3957   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3958   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3959   baij = (Mat_SeqBAIJ *)(*mat)->data;
3960   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3961 
3962   baij->i = i;
3963   baij->j = j;
3964   baij->a = a;
3965 
3966   baij->singlemalloc   = PETSC_FALSE;
3967   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3968   baij->free_a         = PETSC_FALSE;
3969   baij->free_ij        = PETSC_FALSE;
3970   baij->free_imax_ilen = PETSC_TRUE;
3971 
3972   for (PetscInt ii = 0; ii < m; ii++) {
3973     const PetscInt row_len = i[ii + 1] - i[ii];
3974 
3975     baij->ilen[ii] = baij->imax[ii] = row_len;
3976     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3977   }
3978   if (PetscDefined(USE_DEBUG)) {
3979     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3980       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3981       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3982     }
3983   }
3984 
3985   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3986   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3987   PetscFunctionReturn(PETSC_SUCCESS);
3988 }
3989 
3990 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3991 {
3992   PetscFunctionBegin;
3993   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3994   PetscFunctionReturn(PETSC_SUCCESS);
3995 }
3996