xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 5d7a6ebe9dde080aedbe86be0085708de8f97bb7)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237     A->nonzerostate++; \
238   a_noinsert:; \
239     ailen[brow] = nrow; \
240   } while (0)
241 
242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
243   do { \
244     brow = row / bs; \
245     rp   = bj + bi[brow]; \
246     ap   = ba + bs2 * bi[brow]; \
247     rmax = bimax[brow]; \
248     nrow = bilen[brow]; \
249     bcol = col / bs; \
250     ridx = row % bs; \
251     cidx = col % bs; \
252     low  = 0; \
253     high = nrow; \
254     while (high - low > 3) { \
255       t = (low + high) / 2; \
256       if (rp[t] > bcol) high = t; \
257       else low = t; \
258     } \
259     for (_i = low; _i < high; _i++) { \
260       if (rp[_i] > bcol) break; \
261       if (rp[_i] == bcol) { \
262         bap = ap + bs2 * _i + bs * cidx + ridx; \
263         if (addv == ADD_VALUES) *bap += value; \
264         else *bap = value; \
265         goto b_noinsert; \
266       } \
267     } \
268     if (b->nonew == 1) goto b_noinsert; \
269     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
270     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
271     N = nrow++ - 1; \
272     /* shift up all the later entries in this row */ \
273     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
274     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
275     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
276     rp[_i]                          = bcol; \
277     ap[bs2 * _i + bs * cidx + ridx] = value; \
278     B->nonzerostate++; \
279   b_noinsert:; \
280     bilen[brow] = nrow; \
281   } while (0)
282 
283 /* Only add/insert a(i,j) with i<=j (blocks).
284    Any a(i,j) with i>j input by user is ignored or generates an error
285 */
286 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
287 {
288   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
289   MatScalar     value;
290   PetscBool     roworiented = baij->roworiented;
291   PetscInt      i, j, row, col;
292   PetscInt      rstart_orig = mat->rmap->rstart;
293   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
294   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
295 
296   /* Some Variables required in the macro */
297   Mat           A     = baij->A;
298   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
299   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
300   MatScalar    *aa = a->a;
301 
302   Mat          B     = baij->B;
303   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
304   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
305   MatScalar   *ba = b->a;
306 
307   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
308   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
309   MatScalar *ap, *bap;
310 
311   /* for stash */
312   PetscInt   n_loc, *in_loc = NULL;
313   MatScalar *v_loc = NULL;
314 
315   PetscFunctionBegin;
316   if (!baij->donotstash) {
317     if (n > baij->n_loc) {
318       PetscCall(PetscFree(baij->in_loc));
319       PetscCall(PetscFree(baij->v_loc));
320       PetscCall(PetscMalloc1(n, &baij->in_loc));
321       PetscCall(PetscMalloc1(n, &baij->v_loc));
322 
323       baij->n_loc = n;
324     }
325     in_loc = baij->in_loc;
326     v_loc  = baij->v_loc;
327   }
328 
329   for (i = 0; i < m; i++) {
330     if (im[i] < 0) continue;
331     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
332     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
333       row = im[i] - rstart_orig;                     /* local row index */
334       for (j = 0; j < n; j++) {
335         if (im[i] / bs > in[j] / bs) {
336           if (a->ignore_ltriangular) {
337             continue; /* ignore lower triangular blocks */
338           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
339         }
340         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
341           col  = in[j] - cstart_orig;                    /* local col index */
342           brow = row / bs;
343           bcol = col / bs;
344           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
345           if (roworiented) value = v[i * n + j];
346           else value = v[i + j * m];
347           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
348           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
349         } else if (in[j] < 0) {
350           continue;
351         } else {
352           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
353           /* off-diag entry (B) */
354           if (mat->was_assembled) {
355             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
356 #if defined(PETSC_USE_CTABLE)
357             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
358             col = col - 1;
359 #else
360             col = baij->colmap[in[j] / bs] - 1;
361 #endif
362             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
363               PetscCall(MatDisAssemble_MPISBAIJ(mat));
364               col = in[j];
365               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
366               B     = baij->B;
367               b     = (Mat_SeqBAIJ *)(B)->data;
368               bimax = b->imax;
369               bi    = b->i;
370               bilen = b->ilen;
371               bj    = b->j;
372               ba    = b->a;
373             } else col += in[j] % bs;
374           } else col = in[j];
375           if (roworiented) value = v[i * n + j];
376           else value = v[i + j * m];
377           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
378           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
379         }
380       }
381     } else { /* off processor entry */
382       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
383       if (!baij->donotstash) {
384         mat->assembled = PETSC_FALSE;
385         n_loc          = 0;
386         for (j = 0; j < n; j++) {
387           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
388           in_loc[n_loc] = in[j];
389           if (roworiented) {
390             v_loc[n_loc] = v[i * n + j];
391           } else {
392             v_loc[n_loc] = v[j * m + i];
393           }
394           n_loc++;
395         }
396         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
397       }
398     }
399   }
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
404 {
405   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
406   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
407   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
408   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
409   PetscBool          roworiented = a->roworiented;
410   const PetscScalar *value       = v;
411   MatScalar         *ap, *aa = a->a, *bap;
412 
413   PetscFunctionBegin;
414   if (col < row) {
415     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
416     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
417   }
418   rp    = aj + ai[row];
419   ap    = aa + bs2 * ai[row];
420   rmax  = imax[row];
421   nrow  = ailen[row];
422   value = v;
423   low   = 0;
424   high  = nrow;
425 
426   while (high - low > 7) {
427     t = (low + high) / 2;
428     if (rp[t] > col) high = t;
429     else low = t;
430   }
431   for (i = low; i < high; i++) {
432     if (rp[i] > col) break;
433     if (rp[i] == col) {
434       bap = ap + bs2 * i;
435       if (roworiented) {
436         if (is == ADD_VALUES) {
437           for (ii = 0; ii < bs; ii++) {
438             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
439           }
440         } else {
441           for (ii = 0; ii < bs; ii++) {
442             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
443           }
444         }
445       } else {
446         if (is == ADD_VALUES) {
447           for (ii = 0; ii < bs; ii++) {
448             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
449           }
450         } else {
451           for (ii = 0; ii < bs; ii++) {
452             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
453           }
454         }
455       }
456       goto noinsert2;
457     }
458   }
459   if (nonew == 1) goto noinsert2;
460   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
461   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
462   N = nrow++ - 1;
463   high++;
464   /* shift up all the later entries in this row */
465   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
466   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
467   rp[i] = col;
468   bap   = ap + bs2 * i;
469   if (roworiented) {
470     for (ii = 0; ii < bs; ii++) {
471       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
472     }
473   } else {
474     for (ii = 0; ii < bs; ii++) {
475       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
476     }
477   }
478 noinsert2:;
479   ailen[row] = nrow;
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 
483 /*
484    This routine is exactly duplicated in mpibaij.c
485 */
486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
487 {
488   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
489   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
490   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
491   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
492   PetscBool          roworiented = a->roworiented;
493   const PetscScalar *value       = v;
494   MatScalar         *ap, *aa = a->a, *bap;
495 
496   PetscFunctionBegin;
497   rp    = aj + ai[row];
498   ap    = aa + bs2 * ai[row];
499   rmax  = imax[row];
500   nrow  = ailen[row];
501   low   = 0;
502   high  = nrow;
503   value = v;
504   while (high - low > 7) {
505     t = (low + high) / 2;
506     if (rp[t] > col) high = t;
507     else low = t;
508   }
509   for (i = low; i < high; i++) {
510     if (rp[i] > col) break;
511     if (rp[i] == col) {
512       bap = ap + bs2 * i;
513       if (roworiented) {
514         if (is == ADD_VALUES) {
515           for (ii = 0; ii < bs; ii++) {
516             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
517           }
518         } else {
519           for (ii = 0; ii < bs; ii++) {
520             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
521           }
522         }
523       } else {
524         if (is == ADD_VALUES) {
525           for (ii = 0; ii < bs; ii++, value += bs) {
526             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
527             bap += bs;
528           }
529         } else {
530           for (ii = 0; ii < bs; ii++, value += bs) {
531             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
532             bap += bs;
533           }
534         }
535       }
536       goto noinsert2;
537     }
538   }
539   if (nonew == 1) goto noinsert2;
540   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
541   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
542   N = nrow++ - 1;
543   high++;
544   /* shift up all the later entries in this row */
545   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
546   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
547   rp[i] = col;
548   bap   = ap + bs2 * i;
549   if (roworiented) {
550     for (ii = 0; ii < bs; ii++) {
551       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
552     }
553   } else {
554     for (ii = 0; ii < bs; ii++) {
555       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
556     }
557   }
558 noinsert2:;
559   ailen[row] = nrow;
560   PetscFunctionReturn(PETSC_SUCCESS);
561 }
562 
563 /*
564     This routine could be optimized by removing the need for the block copy below and passing stride information
565   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
566 */
567 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
568 {
569   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
570   const MatScalar *value;
571   MatScalar       *barray      = baij->barray;
572   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
573   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
574   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
575   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
576 
577   PetscFunctionBegin;
578   if (!barray) {
579     PetscCall(PetscMalloc1(bs2, &barray));
580     baij->barray = barray;
581   }
582 
583   if (roworiented) {
584     stepval = (n - 1) * bs;
585   } else {
586     stepval = (m - 1) * bs;
587   }
588   for (i = 0; i < m; i++) {
589     if (im[i] < 0) continue;
590     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
591     if (im[i] >= rstart && im[i] < rend) {
592       row = im[i] - rstart;
593       for (j = 0; j < n; j++) {
594         if (im[i] > in[j]) {
595           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
596           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
597         }
598         /* If NumCol = 1 then a copy is not required */
599         if ((roworiented) && (n == 1)) {
600           barray = (MatScalar *)v + i * bs2;
601         } else if ((!roworiented) && (m == 1)) {
602           barray = (MatScalar *)v + j * bs2;
603         } else { /* Here a copy is required */
604           if (roworiented) {
605             value = v + i * (stepval + bs) * bs + j * bs;
606           } else {
607             value = v + j * (stepval + bs) * bs + i * bs;
608           }
609           for (ii = 0; ii < bs; ii++, value += stepval) {
610             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
611           }
612           barray -= bs2;
613         }
614 
615         if (in[j] >= cstart && in[j] < cend) {
616           col = in[j] - cstart;
617           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
618         } else if (in[j] < 0) {
619           continue;
620         } else {
621           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
622           if (mat->was_assembled) {
623             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624 
625 #if defined(PETSC_USE_DEBUG)
626   #if defined(PETSC_USE_CTABLE)
627             {
628               PetscInt data;
629               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
630               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
631             }
632   #else
633             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
634   #endif
635 #endif
636 #if defined(PETSC_USE_CTABLE)
637             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
638             col = (col - 1) / bs;
639 #else
640             col = (baij->colmap[in[j]] - 1) / bs;
641 #endif
642             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
643               PetscCall(MatDisAssemble_MPISBAIJ(mat));
644               col = in[j];
645             }
646           } else col = in[j];
647           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
648         }
649       }
650     } else {
651       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
652       if (!baij->donotstash) {
653         if (roworiented) {
654           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
655         } else {
656           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         }
658       }
659     }
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
665 {
666   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
667   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
668   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
669 
670   PetscFunctionBegin;
671   for (i = 0; i < m; i++) {
672     if (idxm[i] < 0) continue; /* negative row */
673     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
674     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
675       row = idxm[i] - bsrstart;
676       for (j = 0; j < n; j++) {
677         if (idxn[j] < 0) continue; /* negative column */
678         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
679         if (idxn[j] >= bscstart && idxn[j] < bscend) {
680           col = idxn[j] - bscstart;
681           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
682         } else {
683           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
684 #if defined(PETSC_USE_CTABLE)
685           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
686           data--;
687 #else
688           data = baij->colmap[idxn[j] / bs] - 1;
689 #endif
690           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
691           else {
692             col = data + idxn[j] % bs;
693             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
694           }
695         }
696       }
697     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
698   }
699   PetscFunctionReturn(PETSC_SUCCESS);
700 }
701 
702 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
703 {
704   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
705   PetscReal     sum[2], *lnorm2;
706 
707   PetscFunctionBegin;
708   if (baij->size == 1) {
709     PetscCall(MatNorm(baij->A, type, norm));
710   } else {
711     if (type == NORM_FROBENIUS) {
712       PetscCall(PetscMalloc1(2, &lnorm2));
713       PetscCall(MatNorm(baij->A, type, lnorm2));
714       *lnorm2 = (*lnorm2) * (*lnorm2);
715       lnorm2++; /* squar power of norm(A) */
716       PetscCall(MatNorm(baij->B, type, lnorm2));
717       *lnorm2 = (*lnorm2) * (*lnorm2);
718       lnorm2--; /* squar power of norm(B) */
719       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
720       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
721       PetscCall(PetscFree(lnorm2));
722     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
723       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
724       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
725       PetscReal    *rsum, *rsum2, vabs;
726       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
727       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
728       MatScalar    *v;
729 
730       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
731       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
732       /* Amat */
733       v  = amat->a;
734       jj = amat->j;
735       for (brow = 0; brow < mbs; brow++) {
736         grow = bs * (rstart + brow);
737         nz   = amat->i[brow + 1] - amat->i[brow];
738         for (bcol = 0; bcol < nz; bcol++) {
739           gcol = bs * (rstart + *jj);
740           jj++;
741           for (col = 0; col < bs; col++) {
742             for (row = 0; row < bs; row++) {
743               vabs = PetscAbsScalar(*v);
744               v++;
745               rsum[gcol + col] += vabs;
746               /* non-diagonal block */
747               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
748             }
749           }
750         }
751         PetscCall(PetscLogFlops(nz * bs * bs));
752       }
753       /* Bmat */
754       v  = bmat->a;
755       jj = bmat->j;
756       for (brow = 0; brow < mbs; brow++) {
757         grow = bs * (rstart + brow);
758         nz   = bmat->i[brow + 1] - bmat->i[brow];
759         for (bcol = 0; bcol < nz; bcol++) {
760           gcol = bs * garray[*jj];
761           jj++;
762           for (col = 0; col < bs; col++) {
763             for (row = 0; row < bs; row++) {
764               vabs = PetscAbsScalar(*v);
765               v++;
766               rsum[gcol + col] += vabs;
767               rsum[grow + row] += vabs;
768             }
769           }
770         }
771         PetscCall(PetscLogFlops(nz * bs * bs));
772       }
773       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
774       *norm = 0.0;
775       for (col = 0; col < mat->cmap->N; col++) {
776         if (rsum2[col] > *norm) *norm = rsum2[col];
777       }
778       PetscCall(PetscFree2(rsum, rsum2));
779     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
780   }
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
785 {
786   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
787   PetscInt      nstash, reallocs;
788 
789   PetscFunctionBegin;
790   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
791 
792   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
793   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
794   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
795   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
802 {
803   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
804   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
805   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
806   PetscInt     *row, *col;
807   PetscBool     other_disassembled;
808   PetscMPIInt   n;
809   PetscBool     r1, r2, r3;
810   MatScalar    *val;
811 
812   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
813   PetscFunctionBegin;
814   if (!baij->donotstash && !mat->nooffprocentries) {
815     while (1) {
816       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
817       if (!flg) break;
818 
819       for (i = 0; i < n;) {
820         /* Now identify the consecutive vals belonging to the same row */
821         for (j = i, rstart = row[j]; j < n; j++) {
822           if (row[j] != rstart) break;
823         }
824         if (j < n) ncols = j - i;
825         else ncols = n - i;
826         /* Now assemble all these values with a single function call */
827         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
828         i = j;
829       }
830     }
831     PetscCall(MatStashScatterEnd_Private(&mat->stash));
832     /* Now process the block-stash. Since the values are stashed column-oriented,
833        set the roworiented flag to column oriented, and after MatSetValues()
834        restore the original flags */
835     r1 = baij->roworiented;
836     r2 = a->roworiented;
837     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
838 
839     baij->roworiented = PETSC_FALSE;
840     a->roworiented    = PETSC_FALSE;
841 
842     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
843     while (1) {
844       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
845       if (!flg) break;
846 
847       for (i = 0; i < n;) {
848         /* Now identify the consecutive vals belonging to the same row */
849         for (j = i, rstart = row[j]; j < n; j++) {
850           if (row[j] != rstart) break;
851         }
852         if (j < n) ncols = j - i;
853         else ncols = n - i;
854         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
855         i = j;
856       }
857     }
858     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
859 
860     baij->roworiented = r1;
861     a->roworiented    = r2;
862 
863     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
864   }
865 
866   PetscCall(MatAssemblyBegin(baij->A, mode));
867   PetscCall(MatAssemblyEnd(baij->A, mode));
868 
869   /* determine if any processor has disassembled, if so we must
870      also disassemble ourselves, in order that we may reassemble. */
871   /*
872      if nonzero structure of submatrix B cannot change then we know that
873      no processor disassembled thus we can skip this stuff
874   */
875   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
876     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
877     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
878   }
879 
880   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
881   PetscCall(MatAssemblyBegin(baij->B, mode));
882   PetscCall(MatAssemblyEnd(baij->B, mode));
883 
884   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
885 
886   baij->rowvalues = NULL;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
890     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
891     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
892   }
893   PetscFunctionReturn(PETSC_SUCCESS);
894 }
895 
896 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
897 #include <petscdraw.h>
898 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
899 {
900   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
901   PetscInt          bs   = mat->rmap->bs;
902   PetscMPIInt       rank = baij->rank;
903   PetscBool         iascii, isdraw;
904   PetscViewer       sviewer;
905   PetscViewerFormat format;
906 
907   PetscFunctionBegin;
908   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
909   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
910   if (iascii) {
911     PetscCall(PetscViewerGetFormat(viewer, &format));
912     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
913       MatInfo info;
914       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
915       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
916       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
917       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
918                                                    mat->rmap->bs, (double)info.memory));
919       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
920       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
921       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(PetscViewerFlush(viewer));
924       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
925       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
926       PetscCall(VecScatterView(baij->Mvctx, viewer));
927       PetscFunctionReturn(PETSC_SUCCESS);
928     } else if (format == PETSC_VIEWER_ASCII_INFO) {
929       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
930       PetscFunctionReturn(PETSC_SUCCESS);
931     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
932       PetscFunctionReturn(PETSC_SUCCESS);
933     }
934   }
935 
936   if (isdraw) {
937     PetscDraw draw;
938     PetscBool isnull;
939     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
940     PetscCall(PetscDrawIsNull(draw, &isnull));
941     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
942   }
943 
944   {
945     /* assemble the entire matrix onto first processor. */
946     Mat           A;
947     Mat_SeqSBAIJ *Aloc;
948     Mat_SeqBAIJ  *Bloc;
949     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
950     MatScalar    *a;
951     const char   *matname;
952 
953     /* Should this be the same type as mat? */
954     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
955     if (rank == 0) {
956       PetscCall(MatSetSizes(A, M, N, M, N));
957     } else {
958       PetscCall(MatSetSizes(A, 0, 0, M, N));
959     }
960     PetscCall(MatSetType(A, MATMPISBAIJ));
961     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
962     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
963 
964     /* copy over the A part */
965     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
966     ai   = Aloc->i;
967     aj   = Aloc->j;
968     a    = Aloc->a;
969     PetscCall(PetscMalloc1(bs, &rvals));
970 
971     for (i = 0; i < mbs; i++) {
972       rvals[0] = bs * (baij->rstartbs + i);
973       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
974       for (j = ai[i]; j < ai[i + 1]; j++) {
975         col = (baij->cstartbs + aj[j]) * bs;
976         for (k = 0; k < bs; k++) {
977           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
978           col++;
979           a += bs;
980         }
981       }
982     }
983     /* copy over the B part */
984     Bloc = (Mat_SeqBAIJ *)baij->B->data;
985     ai   = Bloc->i;
986     aj   = Bloc->j;
987     a    = Bloc->a;
988     for (i = 0; i < mbs; i++) {
989       rvals[0] = bs * (baij->rstartbs + i);
990       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
991       for (j = ai[i]; j < ai[i + 1]; j++) {
992         col = baij->garray[aj[j]] * bs;
993         for (k = 0; k < bs; k++) {
994           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
995           col++;
996           a += bs;
997         }
998       }
999     }
1000     PetscCall(PetscFree(rvals));
1001     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1002     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1003     /*
1004        Everyone has to call to draw the matrix since the graphics waits are
1005        synchronized across all processors that share the PetscDraw object
1006     */
1007     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1008     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1009     if (rank == 0) {
1010       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
1011       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1012     }
1013     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1014     PetscCall(PetscViewerFlush(viewer));
1015     PetscCall(MatDestroy(&A));
1016   }
1017   PetscFunctionReturn(PETSC_SUCCESS);
1018 }
1019 
1020 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1021 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1022 
1023 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1024 {
1025   PetscBool iascii, isdraw, issocket, isbinary;
1026 
1027   PetscFunctionBegin;
1028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1029   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1032   if (iascii || isdraw || issocket) {
1033     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1034   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1035   PetscFunctionReturn(PETSC_SUCCESS);
1036 }
1037 
1038 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1039 {
1040   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1041   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1042   PetscScalar       *from;
1043   const PetscScalar *x;
1044 
1045   PetscFunctionBegin;
1046   /* diagonal part */
1047   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1048   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1049   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1050   PetscCall(VecZeroEntries(a->slvec1b));
1051 
1052   /* subdiagonal part */
1053   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1054   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1055 
1056   /* copy x into the vec slvec0 */
1057   PetscCall(VecGetArray(a->slvec0, &from));
1058   PetscCall(VecGetArrayRead(xx, &x));
1059 
1060   PetscCall(PetscArraycpy(from, x, bs * mbs));
1061   PetscCall(VecRestoreArray(a->slvec0, &from));
1062   PetscCall(VecRestoreArrayRead(xx, &x));
1063 
1064   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1065   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1066   /* supperdiagonal part */
1067   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1072 {
1073   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1074   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1075   PetscScalar       *from;
1076   const PetscScalar *x;
1077 
1078   PetscFunctionBegin;
1079   /* diagonal part */
1080   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1081   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1082   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1083   PetscCall(VecZeroEntries(a->slvec1b));
1084 
1085   /* subdiagonal part */
1086   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1087 
1088   /* copy x into the vec slvec0 */
1089   PetscCall(VecGetArray(a->slvec0, &from));
1090   PetscCall(VecGetArrayRead(xx, &x));
1091 
1092   PetscCall(PetscArraycpy(from, x, bs * mbs));
1093   PetscCall(VecRestoreArray(a->slvec0, &from));
1094   PetscCall(VecRestoreArrayRead(xx, &x));
1095 
1096   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1097   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1098   /* supperdiagonal part */
1099   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1100   PetscFunctionReturn(PETSC_SUCCESS);
1101 }
1102 
1103 #if PetscDefined(USE_COMPLEX)
1104 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1105 {
1106   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1107   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1108   PetscScalar       *from;
1109   const PetscScalar *x;
1110 
1111   PetscFunctionBegin;
1112   /* diagonal part */
1113   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1114   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1115   PetscCall(VecZeroEntries(a->slvec1b));
1116 
1117   /* subdiagonal part */
1118   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1119   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1120 
1121   /* copy x into the vec slvec0 */
1122   PetscCall(VecGetArray(a->slvec0, &from));
1123   PetscCall(VecGetArrayRead(xx, &x));
1124   PetscCall(PetscArraycpy(from, x, bs * mbs));
1125   PetscCall(VecRestoreArray(a->slvec0, &from));
1126 
1127   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1128   PetscCall(VecRestoreArrayRead(xx, &x));
1129   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1130 
1131   /* supperdiagonal part */
1132   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 #endif
1136 
1137 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1138 {
1139   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1140   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1141   PetscScalar       *from;
1142   const PetscScalar *x;
1143 
1144   PetscFunctionBegin;
1145   /* diagonal part */
1146   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1147   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1148   PetscCall(VecZeroEntries(a->slvec1b));
1149 
1150   /* subdiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1152 
1153   /* copy x into the vec slvec0 */
1154   PetscCall(VecGetArray(a->slvec0, &from));
1155   PetscCall(VecGetArrayRead(xx, &x));
1156   PetscCall(PetscArraycpy(from, x, bs * mbs));
1157   PetscCall(VecRestoreArray(a->slvec0, &from));
1158 
1159   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1160   PetscCall(VecRestoreArrayRead(xx, &x));
1161   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1162 
1163   /* supperdiagonal part */
1164   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 /*
1169   This only works correctly for square matrices where the subblock A->A is the
1170    diagonal block
1171 */
1172 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1173 {
1174   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1178   PetscCall(MatGetDiagonal(a->A, v));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1183 {
1184   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(MatScale(a->A, aa));
1188   PetscCall(MatScale(a->B, aa));
1189   PetscFunctionReturn(PETSC_SUCCESS);
1190 }
1191 
1192 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1193 {
1194   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1195   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1196   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1197   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1198   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1199 
1200   PetscFunctionBegin;
1201   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1202   mat->getrowactive = PETSC_TRUE;
1203 
1204   if (!mat->rowvalues && (idx || v)) {
1205     /*
1206         allocate enough space to hold information from the longest row.
1207     */
1208     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1209     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1210     PetscInt      max = 1, mbs = mat->mbs, tmp;
1211     for (i = 0; i < mbs; i++) {
1212       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1213       if (max < tmp) max = tmp;
1214     }
1215     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1216   }
1217 
1218   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1219   lrow = row - brstart; /* local row index */
1220 
1221   pvA = &vworkA;
1222   pcA = &cworkA;
1223   pvB = &vworkB;
1224   pcB = &cworkB;
1225   if (!v) {
1226     pvA = NULL;
1227     pvB = NULL;
1228   }
1229   if (!idx) {
1230     pcA = NULL;
1231     if (!v) pcB = NULL;
1232   }
1233   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1234   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1235   nztot = nzA + nzB;
1236 
1237   cmap = mat->garray;
1238   if (v || idx) {
1239     if (nztot) {
1240       /* Sort by increasing column numbers, assuming A and B already sorted */
1241       PetscInt imark = -1;
1242       if (v) {
1243         *v = v_p = mat->rowvalues;
1244         for (i = 0; i < nzB; i++) {
1245           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1246           else break;
1247         }
1248         imark = i;
1249         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1250         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1251       }
1252       if (idx) {
1253         *idx = idx_p = mat->rowindices;
1254         if (imark > -1) {
1255           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1256         } else {
1257           for (i = 0; i < nzB; i++) {
1258             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259             else break;
1260           }
1261           imark = i;
1262         }
1263         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1264         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1265       }
1266     } else {
1267       if (idx) *idx = NULL;
1268       if (v) *v = NULL;
1269     }
1270   }
1271   *nz = nztot;
1272   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1273   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1278 {
1279   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1280 
1281   PetscFunctionBegin;
1282   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1283   baij->getrowactive = PETSC_FALSE;
1284   PetscFunctionReturn(PETSC_SUCCESS);
1285 }
1286 
1287 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1288 {
1289   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1290   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1291 
1292   PetscFunctionBegin;
1293   aA->getrow_utriangular = PETSC_TRUE;
1294   PetscFunctionReturn(PETSC_SUCCESS);
1295 }
1296 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1297 {
1298   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1299   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1300 
1301   PetscFunctionBegin;
1302   aA->getrow_utriangular = PETSC_FALSE;
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1307 {
1308   PetscFunctionBegin;
1309   if (PetscDefined(USE_COMPLEX)) {
1310     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1311 
1312     PetscCall(MatConjugate(a->A));
1313     PetscCall(MatConjugate(a->B));
1314   }
1315   PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317 
1318 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1319 {
1320   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1321 
1322   PetscFunctionBegin;
1323   PetscCall(MatRealPart(a->A));
1324   PetscCall(MatRealPart(a->B));
1325   PetscFunctionReturn(PETSC_SUCCESS);
1326 }
1327 
1328 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1329 {
1330   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1331 
1332   PetscFunctionBegin;
1333   PetscCall(MatImaginaryPart(a->A));
1334   PetscCall(MatImaginaryPart(a->B));
1335   PetscFunctionReturn(PETSC_SUCCESS);
1336 }
1337 
1338 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1339    Input: isrow       - distributed(parallel),
1340           iscol_local - locally owned (seq)
1341 */
1342 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1343 {
1344   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1345   const PetscInt *ptr1, *ptr2;
1346 
1347   PetscFunctionBegin;
1348   *flg = PETSC_FALSE;
1349   PetscCall(ISGetLocalSize(isrow, &sz1));
1350   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1351   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1352 
1353   PetscCall(ISGetIndices(isrow, &ptr1));
1354   PetscCall(ISGetIndices(iscol_local, &ptr2));
1355 
1356   PetscCall(PetscMalloc1(sz1, &a1));
1357   PetscCall(PetscMalloc1(sz2, &a2));
1358   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1359   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1360   PetscCall(PetscSortInt(sz1, a1));
1361   PetscCall(PetscSortInt(sz2, a2));
1362 
1363   nmatch = 0;
1364   k      = 0;
1365   for (i = 0; i < sz1; i++) {
1366     for (j = k; j < sz2; j++) {
1367       if (a1[i] == a2[j]) {
1368         k = j;
1369         nmatch++;
1370         break;
1371       }
1372     }
1373   }
1374   PetscCall(ISRestoreIndices(isrow, &ptr1));
1375   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1376   PetscCall(PetscFree(a1));
1377   PetscCall(PetscFree(a2));
1378   if (nmatch < sz1) {
1379     *flg = PETSC_FALSE;
1380   } else {
1381     *flg = PETSC_TRUE;
1382   }
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
1386 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1387 {
1388   IS       iscol_local;
1389   PetscInt csize;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(ISGetLocalSize(iscol, &csize));
1393   if (call == MAT_REUSE_MATRIX) {
1394     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1395     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1396   } else {
1397     PetscBool issorted, isequal;
1398 
1399     PetscCall(ISAllGather(iscol, &iscol_local));
1400     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1401     PetscCall(ISSorted(iscol_local, &issorted));
1402     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1403   }
1404 
1405   /* now call MatCreateSubMatrix_MPIBAIJ() */
1406   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1407   if (call == MAT_INITIAL_MATRIX) {
1408     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1409     PetscCall(ISDestroy(&iscol_local));
1410   }
1411   PetscFunctionReturn(PETSC_SUCCESS);
1412 }
1413 
1414 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1415 {
1416   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1417 
1418   PetscFunctionBegin;
1419   PetscCall(MatZeroEntries(l->A));
1420   PetscCall(MatZeroEntries(l->B));
1421   PetscFunctionReturn(PETSC_SUCCESS);
1422 }
1423 
1424 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1425 {
1426   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1427   Mat            A = a->A, B = a->B;
1428   PetscLogDouble isend[5], irecv[5];
1429 
1430   PetscFunctionBegin;
1431   info->block_size = (PetscReal)matin->rmap->bs;
1432 
1433   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1434 
1435   isend[0] = info->nz_used;
1436   isend[1] = info->nz_allocated;
1437   isend[2] = info->nz_unneeded;
1438   isend[3] = info->memory;
1439   isend[4] = info->mallocs;
1440 
1441   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1442 
1443   isend[0] += info->nz_used;
1444   isend[1] += info->nz_allocated;
1445   isend[2] += info->nz_unneeded;
1446   isend[3] += info->memory;
1447   isend[4] += info->mallocs;
1448   if (flag == MAT_LOCAL) {
1449     info->nz_used      = isend[0];
1450     info->nz_allocated = isend[1];
1451     info->nz_unneeded  = isend[2];
1452     info->memory       = isend[3];
1453     info->mallocs      = isend[4];
1454   } else if (flag == MAT_GLOBAL_MAX) {
1455     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1456 
1457     info->nz_used      = irecv[0];
1458     info->nz_allocated = irecv[1];
1459     info->nz_unneeded  = irecv[2];
1460     info->memory       = irecv[3];
1461     info->mallocs      = irecv[4];
1462   } else if (flag == MAT_GLOBAL_SUM) {
1463     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1464 
1465     info->nz_used      = irecv[0];
1466     info->nz_allocated = irecv[1];
1467     info->nz_unneeded  = irecv[2];
1468     info->memory       = irecv[3];
1469     info->mallocs      = irecv[4];
1470   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1471   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1472   info->fill_ratio_needed = 0;
1473   info->factor_mallocs    = 0;
1474   PetscFunctionReturn(PETSC_SUCCESS);
1475 }
1476 
1477 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1478 {
1479   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1480   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1481 
1482   PetscFunctionBegin;
1483   switch (op) {
1484   case MAT_NEW_NONZERO_LOCATIONS:
1485   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1486   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1487   case MAT_KEEP_NONZERO_PATTERN:
1488   case MAT_SUBMAT_SINGLEIS:
1489   case MAT_NEW_NONZERO_LOCATION_ERR:
1490     MatCheckPreallocated(A, 1);
1491     PetscCall(MatSetOption(a->A, op, flg));
1492     PetscCall(MatSetOption(a->B, op, flg));
1493     break;
1494   case MAT_ROW_ORIENTED:
1495     MatCheckPreallocated(A, 1);
1496     a->roworiented = flg;
1497 
1498     PetscCall(MatSetOption(a->A, op, flg));
1499     PetscCall(MatSetOption(a->B, op, flg));
1500     break;
1501   case MAT_FORCE_DIAGONAL_ENTRIES:
1502   case MAT_SORTED_FULL:
1503     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1504     break;
1505   case MAT_IGNORE_OFF_PROC_ENTRIES:
1506     a->donotstash = flg;
1507     break;
1508   case MAT_USE_HASH_TABLE:
1509     a->ht_flag = flg;
1510     break;
1511   case MAT_HERMITIAN:
1512     MatCheckPreallocated(A, 1);
1513     PetscCall(MatSetOption(a->A, op, flg));
1514 #if defined(PETSC_USE_COMPLEX)
1515     if (flg) { /* need different mat-vec ops */
1516       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1517       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1518       A->ops->multtranspose    = NULL;
1519       A->ops->multtransposeadd = NULL;
1520       A->symmetric             = PETSC_BOOL3_FALSE;
1521     }
1522 #endif
1523     break;
1524   case MAT_SPD:
1525   case MAT_SYMMETRIC:
1526     MatCheckPreallocated(A, 1);
1527     PetscCall(MatSetOption(a->A, op, flg));
1528 #if defined(PETSC_USE_COMPLEX)
1529     if (flg) { /* restore to use default mat-vec ops */
1530       A->ops->mult             = MatMult_MPISBAIJ;
1531       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1532       A->ops->multtranspose    = MatMult_MPISBAIJ;
1533       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1534     }
1535 #endif
1536     break;
1537   case MAT_STRUCTURALLY_SYMMETRIC:
1538     MatCheckPreallocated(A, 1);
1539     PetscCall(MatSetOption(a->A, op, flg));
1540     break;
1541   case MAT_SYMMETRY_ETERNAL:
1542   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1543     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1544     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1545     break;
1546   case MAT_SPD_ETERNAL:
1547     break;
1548   case MAT_IGNORE_LOWER_TRIANGULAR:
1549     aA->ignore_ltriangular = flg;
1550     break;
1551   case MAT_ERROR_LOWER_TRIANGULAR:
1552     aA->ignore_ltriangular = flg;
1553     break;
1554   case MAT_GETROW_UPPERTRIANGULAR:
1555     aA->getrow_utriangular = flg;
1556     break;
1557   default:
1558     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1559   }
1560   PetscFunctionReturn(PETSC_SUCCESS);
1561 }
1562 
1563 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1564 {
1565   PetscFunctionBegin;
1566   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1567   if (reuse == MAT_INITIAL_MATRIX) {
1568     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1569   } else if (reuse == MAT_REUSE_MATRIX) {
1570     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1571   }
1572   PetscFunctionReturn(PETSC_SUCCESS);
1573 }
1574 
1575 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1576 {
1577   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1578   Mat           a = baij->A, b = baij->B;
1579   PetscInt      nv, m, n;
1580   PetscBool     flg;
1581 
1582   PetscFunctionBegin;
1583   if (ll != rr) {
1584     PetscCall(VecEqual(ll, rr, &flg));
1585     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1586   }
1587   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1588 
1589   PetscCall(MatGetLocalSize(mat, &m, &n));
1590   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1591 
1592   PetscCall(VecGetLocalSize(rr, &nv));
1593   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1594 
1595   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1596 
1597   /* left diagonalscale the off-diagonal part */
1598   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1599 
1600   /* scale the diagonal part */
1601   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1602 
1603   /* right diagonalscale the off-diagonal part */
1604   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1605   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1606   PetscFunctionReturn(PETSC_SUCCESS);
1607 }
1608 
1609 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1610 {
1611   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1612 
1613   PetscFunctionBegin;
1614   PetscCall(MatSetUnfactored(a->A));
1615   PetscFunctionReturn(PETSC_SUCCESS);
1616 }
1617 
1618 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1619 
1620 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1621 {
1622   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1623   Mat           a, b, c, d;
1624   PetscBool     flg;
1625 
1626   PetscFunctionBegin;
1627   a = matA->A;
1628   b = matA->B;
1629   c = matB->A;
1630   d = matB->B;
1631 
1632   PetscCall(MatEqual(a, c, &flg));
1633   if (flg) PetscCall(MatEqual(b, d, &flg));
1634   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1635   PetscFunctionReturn(PETSC_SUCCESS);
1636 }
1637 
1638 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1639 {
1640   PetscBool isbaij;
1641 
1642   PetscFunctionBegin;
1643   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1644   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1645   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1646   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1647     PetscCall(MatGetRowUpperTriangular(A));
1648     PetscCall(MatCopy_Basic(A, B, str));
1649     PetscCall(MatRestoreRowUpperTriangular(A));
1650   } else {
1651     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1652     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1653 
1654     PetscCall(MatCopy(a->A, b->A, str));
1655     PetscCall(MatCopy(a->B, b->B, str));
1656   }
1657   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1658   PetscFunctionReturn(PETSC_SUCCESS);
1659 }
1660 
1661 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1662 {
1663   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1664   PetscBLASInt  bnz, one                          = 1;
1665   Mat_SeqSBAIJ *xa, *ya;
1666   Mat_SeqBAIJ  *xb, *yb;
1667 
1668   PetscFunctionBegin;
1669   if (str == SAME_NONZERO_PATTERN) {
1670     PetscScalar alpha = a;
1671     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1672     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1673     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1674     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1675     xb = (Mat_SeqBAIJ *)xx->B->data;
1676     yb = (Mat_SeqBAIJ *)yy->B->data;
1677     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1678     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1679     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1680   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1681     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1682     PetscCall(MatAXPY_Basic(Y, a, X, str));
1683     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1684   } else {
1685     Mat       B;
1686     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1687     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1688     PetscCall(MatGetRowUpperTriangular(X));
1689     PetscCall(MatGetRowUpperTriangular(Y));
1690     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1691     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1692     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1693     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1694     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1695     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1696     PetscCall(MatSetType(B, MATMPISBAIJ));
1697     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1698     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1699     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1700     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1701     PetscCall(MatHeaderMerge(Y, &B));
1702     PetscCall(PetscFree(nnz_d));
1703     PetscCall(PetscFree(nnz_o));
1704     PetscCall(MatRestoreRowUpperTriangular(X));
1705     PetscCall(MatRestoreRowUpperTriangular(Y));
1706   }
1707   PetscFunctionReturn(PETSC_SUCCESS);
1708 }
1709 
1710 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1711 {
1712   PetscInt  i;
1713   PetscBool flg;
1714 
1715   PetscFunctionBegin;
1716   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1717   for (i = 0; i < n; i++) {
1718     PetscCall(ISEqual(irow[i], icol[i], &flg));
1719     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1720   }
1721   PetscFunctionReturn(PETSC_SUCCESS);
1722 }
1723 
1724 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1725 {
1726   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1727   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1728 
1729   PetscFunctionBegin;
1730   if (!Y->preallocated) {
1731     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1732   } else if (!aij->nz) {
1733     PetscInt nonew = aij->nonew;
1734     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1735     aij->nonew = nonew;
1736   }
1737   PetscCall(MatShift_Basic(Y, a));
1738   PetscFunctionReturn(PETSC_SUCCESS);
1739 }
1740 
1741 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1742 {
1743   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1747   PetscCall(MatMissingDiagonal(a->A, missing, d));
1748   if (d) {
1749     PetscInt rstart;
1750     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1751     *d += rstart / A->rmap->bs;
1752   }
1753   PetscFunctionReturn(PETSC_SUCCESS);
1754 }
1755 
1756 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1757 {
1758   PetscFunctionBegin;
1759   *a = ((Mat_MPISBAIJ *)A->data)->A;
1760   PetscFunctionReturn(PETSC_SUCCESS);
1761 }
1762 
1763 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1764 {
1765   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1766 
1767   PetscFunctionBegin;
1768   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1769   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1774                                        MatGetRow_MPISBAIJ,
1775                                        MatRestoreRow_MPISBAIJ,
1776                                        MatMult_MPISBAIJ,
1777                                        /*  4*/ MatMultAdd_MPISBAIJ,
1778                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1779                                        MatMultAdd_MPISBAIJ,
1780                                        NULL,
1781                                        NULL,
1782                                        NULL,
1783                                        /* 10*/ NULL,
1784                                        NULL,
1785                                        NULL,
1786                                        MatSOR_MPISBAIJ,
1787                                        MatTranspose_MPISBAIJ,
1788                                        /* 15*/ MatGetInfo_MPISBAIJ,
1789                                        MatEqual_MPISBAIJ,
1790                                        MatGetDiagonal_MPISBAIJ,
1791                                        MatDiagonalScale_MPISBAIJ,
1792                                        MatNorm_MPISBAIJ,
1793                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1794                                        MatAssemblyEnd_MPISBAIJ,
1795                                        MatSetOption_MPISBAIJ,
1796                                        MatZeroEntries_MPISBAIJ,
1797                                        /* 24*/ NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        NULL,
1801                                        NULL,
1802                                        /* 29*/ MatSetUp_MPI_Hash,
1803                                        NULL,
1804                                        NULL,
1805                                        MatGetDiagonalBlock_MPISBAIJ,
1806                                        NULL,
1807                                        /* 34*/ MatDuplicate_MPISBAIJ,
1808                                        NULL,
1809                                        NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        /* 39*/ MatAXPY_MPISBAIJ,
1813                                        MatCreateSubMatrices_MPISBAIJ,
1814                                        MatIncreaseOverlap_MPISBAIJ,
1815                                        MatGetValues_MPISBAIJ,
1816                                        MatCopy_MPISBAIJ,
1817                                        /* 44*/ NULL,
1818                                        MatScale_MPISBAIJ,
1819                                        MatShift_MPISBAIJ,
1820                                        NULL,
1821                                        NULL,
1822                                        /* 49*/ NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        /* 54*/ NULL,
1828                                        NULL,
1829                                        MatSetUnfactored_MPISBAIJ,
1830                                        NULL,
1831                                        MatSetValuesBlocked_MPISBAIJ,
1832                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1833                                        NULL,
1834                                        NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        /* 64*/ NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1843                                        NULL,
1844                                        MatConvert_MPISBAIJ_Basic,
1845                                        NULL,
1846                                        NULL,
1847                                        /* 74*/ NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        /* 79*/ NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        MatLoad_MPISBAIJ,
1857                                        /* 84*/ NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        /* 89*/ NULL,
1863                                        NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        /* 94*/ NULL,
1868                                        NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        /* 99*/ NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        MatConjugate_MPISBAIJ,
1876                                        NULL,
1877                                        /*104*/ NULL,
1878                                        MatRealPart_MPISBAIJ,
1879                                        MatImaginaryPart_MPISBAIJ,
1880                                        MatGetRowUpperTriangular_MPISBAIJ,
1881                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1882                                        /*109*/ NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        MatMissingDiagonal_MPISBAIJ,
1887                                        /*114*/ NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        /*119*/ NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        /*124*/ NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        /*129*/ NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        NULL,
1907                                        /*134*/ NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        NULL,
1912                                        /*139*/ MatSetBlockSizes_Default,
1913                                        NULL,
1914                                        NULL,
1915                                        NULL,
1916                                        NULL,
1917                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1918                                        NULL,
1919                                        NULL,
1920                                        NULL,
1921                                        NULL,
1922                                        NULL,
1923                                        /*150*/ NULL,
1924                                        MatEliminateZeros_MPISBAIJ};
1925 
1926 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1927 {
1928   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1929   PetscInt      i, mbs, Mbs;
1930   PetscMPIInt   size;
1931 
1932   PetscFunctionBegin;
1933   if (B->hash_active) {
1934     B->ops[0]      = b->cops;
1935     B->hash_active = PETSC_FALSE;
1936   }
1937   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1938   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1939   PetscCall(PetscLayoutSetUp(B->rmap));
1940   PetscCall(PetscLayoutSetUp(B->cmap));
1941   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1942   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1943   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1944 
1945   mbs = B->rmap->n / bs;
1946   Mbs = B->rmap->N / bs;
1947   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1948 
1949   B->rmap->bs = bs;
1950   b->bs2      = bs * bs;
1951   b->mbs      = mbs;
1952   b->Mbs      = Mbs;
1953   b->nbs      = B->cmap->n / bs;
1954   b->Nbs      = B->cmap->N / bs;
1955 
1956   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1957   b->rstartbs = B->rmap->rstart / bs;
1958   b->rendbs   = B->rmap->rend / bs;
1959 
1960   b->cstartbs = B->cmap->rstart / bs;
1961   b->cendbs   = B->cmap->rend / bs;
1962 
1963 #if defined(PETSC_USE_CTABLE)
1964   PetscCall(PetscHMapIDestroy(&b->colmap));
1965 #else
1966   PetscCall(PetscFree(b->colmap));
1967 #endif
1968   PetscCall(PetscFree(b->garray));
1969   PetscCall(VecDestroy(&b->lvec));
1970   PetscCall(VecScatterDestroy(&b->Mvctx));
1971   PetscCall(VecDestroy(&b->slvec0));
1972   PetscCall(VecDestroy(&b->slvec0b));
1973   PetscCall(VecDestroy(&b->slvec1));
1974   PetscCall(VecDestroy(&b->slvec1a));
1975   PetscCall(VecDestroy(&b->slvec1b));
1976   PetscCall(VecScatterDestroy(&b->sMvctx));
1977 
1978   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1979   PetscCall(MatDestroy(&b->B));
1980   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1981   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1982   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1983 
1984   PetscCall(MatDestroy(&b->A));
1985   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1986   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1987   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1988 
1989   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1990   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1991 
1992   B->preallocated  = PETSC_TRUE;
1993   B->was_assembled = PETSC_FALSE;
1994   B->assembled     = PETSC_FALSE;
1995   PetscFunctionReturn(PETSC_SUCCESS);
1996 }
1997 
1998 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1999 {
2000   PetscInt        m, rstart, cend;
2001   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2002   const PetscInt *JJ          = NULL;
2003   PetscScalar    *values      = NULL;
2004   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2005   PetscBool       nooffprocentries;
2006 
2007   PetscFunctionBegin;
2008   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2009   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2010   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2011   PetscCall(PetscLayoutSetUp(B->rmap));
2012   PetscCall(PetscLayoutSetUp(B->cmap));
2013   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2014   m      = B->rmap->n / bs;
2015   rstart = B->rmap->rstart / bs;
2016   cend   = B->cmap->rend / bs;
2017 
2018   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2019   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2020   for (i = 0; i < m; i++) {
2021     nz = ii[i + 1] - ii[i];
2022     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2023     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2024     JJ = jj + ii[i];
2025     bd = 0;
2026     for (j = 0; j < nz; j++) {
2027       if (*JJ >= i + rstart) break;
2028       JJ++;
2029       bd++;
2030     }
2031     d = 0;
2032     for (; j < nz; j++) {
2033       if (*JJ++ >= cend) break;
2034       d++;
2035     }
2036     d_nnz[i] = d;
2037     o_nnz[i] = nz - d - bd;
2038     nz       = nz - bd;
2039     nz_max   = PetscMax(nz_max, nz);
2040   }
2041   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2042   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2043   PetscCall(PetscFree2(d_nnz, o_nnz));
2044 
2045   values = (PetscScalar *)V;
2046   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2047   for (i = 0; i < m; i++) {
2048     PetscInt        row   = i + rstart;
2049     PetscInt        ncols = ii[i + 1] - ii[i];
2050     const PetscInt *icols = jj + ii[i];
2051     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2052       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2053       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2054     } else { /* block ordering does not match so we can only insert one block at a time. */
2055       PetscInt j;
2056       for (j = 0; j < ncols; j++) {
2057         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2058         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2059       }
2060     }
2061   }
2062 
2063   if (!V) PetscCall(PetscFree(values));
2064   nooffprocentries    = B->nooffprocentries;
2065   B->nooffprocentries = PETSC_TRUE;
2066   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2067   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2068   B->nooffprocentries = nooffprocentries;
2069 
2070   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2071   PetscFunctionReturn(PETSC_SUCCESS);
2072 }
2073 
2074 /*MC
2075    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2076    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2077    the matrix is stored.
2078 
2079    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2080    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2081 
2082    Options Database Key:
2083 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2084 
2085    Level: beginner
2086 
2087    Note:
2088      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2089      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2090 
2091 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2092 M*/
2093 
2094 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2095 {
2096   Mat_MPISBAIJ *b;
2097   PetscBool     flg = PETSC_FALSE;
2098 
2099   PetscFunctionBegin;
2100   PetscCall(PetscNew(&b));
2101   B->data   = (void *)b;
2102   B->ops[0] = MatOps_Values;
2103 
2104   B->ops->destroy = MatDestroy_MPISBAIJ;
2105   B->ops->view    = MatView_MPISBAIJ;
2106   B->assembled    = PETSC_FALSE;
2107   B->insertmode   = NOT_SET_VALUES;
2108 
2109   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2110   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2111 
2112   /* build local table of row and column ownerships */
2113   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2114 
2115   /* build cache for off array entries formed */
2116   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2117 
2118   b->donotstash  = PETSC_FALSE;
2119   b->colmap      = NULL;
2120   b->garray      = NULL;
2121   b->roworiented = PETSC_TRUE;
2122 
2123   /* stuff used in block assembly */
2124   b->barray = NULL;
2125 
2126   /* stuff used for matrix vector multiply */
2127   b->lvec    = NULL;
2128   b->Mvctx   = NULL;
2129   b->slvec0  = NULL;
2130   b->slvec0b = NULL;
2131   b->slvec1  = NULL;
2132   b->slvec1a = NULL;
2133   b->slvec1b = NULL;
2134   b->sMvctx  = NULL;
2135 
2136   /* stuff for MatGetRow() */
2137   b->rowindices   = NULL;
2138   b->rowvalues    = NULL;
2139   b->getrowactive = PETSC_FALSE;
2140 
2141   /* hash table stuff */
2142   b->ht           = NULL;
2143   b->hd           = NULL;
2144   b->ht_size      = 0;
2145   b->ht_flag      = PETSC_FALSE;
2146   b->ht_fact      = 0;
2147   b->ht_total_ct  = 0;
2148   b->ht_insert_ct = 0;
2149 
2150   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2151   b->ijonly = PETSC_FALSE;
2152 
2153   b->in_loc = NULL;
2154   b->v_loc  = NULL;
2155   b->n_loc  = 0;
2156 
2157   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2158   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2159   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2160   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2161 #if defined(PETSC_HAVE_ELEMENTAL)
2162   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2163 #endif
2164 #if defined(PETSC_HAVE_SCALAPACK)
2165   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2166 #endif
2167   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2168   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2169 
2170   B->symmetric                   = PETSC_BOOL3_TRUE;
2171   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2172   B->symmetry_eternal            = PETSC_TRUE;
2173   B->structural_symmetry_eternal = PETSC_TRUE;
2174 #if defined(PETSC_USE_COMPLEX)
2175   B->hermitian = PETSC_BOOL3_FALSE;
2176 #else
2177   B->hermitian = PETSC_BOOL3_TRUE;
2178 #endif
2179 
2180   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2181   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2182   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2183   if (flg) {
2184     PetscReal fact = 1.39;
2185     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2186     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2187     if (fact <= 1.0) fact = 1.39;
2188     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2189     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2190   }
2191   PetscOptionsEnd();
2192   PetscFunctionReturn(PETSC_SUCCESS);
2193 }
2194 
2195 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2196 /*MC
2197    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2198 
2199    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2200    and `MATMPISBAIJ` otherwise.
2201 
2202    Options Database Key:
2203 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2204 
2205   Level: beginner
2206 
2207 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2208 M*/
2209 
2210 /*@C
2211   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2212   the user should preallocate the matrix storage by setting the parameters
2213   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2214   performance can be increased by more than a factor of 50.
2215 
2216   Collective
2217 
2218   Input Parameters:
2219 + B     - the matrix
2220 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2221           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2222 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2223            submatrix  (same for all local rows)
2224 . d_nnz - array containing the number of block nonzeros in the various block rows
2225            in the upper triangular and diagonal part of the in diagonal portion of the local
2226            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2227            for the diagonal entry and set a value even if it is zero.
2228 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2229            submatrix (same for all local rows).
2230 - o_nnz - array containing the number of nonzeros in the various block rows of the
2231            off-diagonal portion of the local submatrix that is right of the diagonal
2232            (possibly different for each block row) or `NULL`.
2233 
2234   Options Database Keys:
2235 + -mat_no_unroll  - uses code that does not unroll the loops in the
2236                      block calculations (much slower)
2237 - -mat_block_size - size of the blocks to use
2238 
2239   Level: intermediate
2240 
2241   Notes:
2242 
2243   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2244   than it must be used on all processors that share the object for that argument.
2245 
2246   If the *_nnz parameter is given then the *_nz parameter is ignored
2247 
2248   Storage Information:
2249   For a square global matrix we define each processor's diagonal portion
2250   to be its local rows and the corresponding columns (a square submatrix);
2251   each processor's off-diagonal portion encompasses the remainder of the
2252   local matrix (a rectangular submatrix).
2253 
2254   The user can specify preallocated storage for the diagonal part of
2255   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2256   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2257   memory allocation.  Likewise, specify preallocated storage for the
2258   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2259 
2260   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2261   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2262   You can also run with the option `-info` and look for messages with the string
2263   malloc in them to see if additional memory allocation was needed.
2264 
2265   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2266   the figure below we depict these three local rows and all columns (0-11).
2267 
2268 .vb
2269            0 1 2 3 4 5 6 7 8 9 10 11
2270           --------------------------
2271    row 3  |. . . d d d o o o o  o  o
2272    row 4  |. . . d d d o o o o  o  o
2273    row 5  |. . . d d d o o o o  o  o
2274           --------------------------
2275 .ve
2276 
2277   Thus, any entries in the d locations are stored in the d (diagonal)
2278   submatrix, and any entries in the o locations are stored in the
2279   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2280   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2281 
2282   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2283   plus the diagonal part of the d matrix,
2284   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2285 
2286   In general, for PDE problems in which most nonzeros are near the diagonal,
2287   one expects `d_nz` >> `o_nz`.
2288 
2289 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2290 @*/
2291 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2292 {
2293   PetscFunctionBegin;
2294   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2295   PetscValidType(B, 1);
2296   PetscValidLogicalCollectiveInt(B, bs, 2);
2297   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2298   PetscFunctionReturn(PETSC_SUCCESS);
2299 }
2300 
2301 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2302 /*@C
2303   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2304   (block compressed row).  For good matrix assembly performance
2305   the user should preallocate the matrix storage by setting the parameters
2306   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2307 
2308   Collective
2309 
2310   Input Parameters:
2311 + comm  - MPI communicator
2312 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2313           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2314 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2315            This value should be the same as the local size used in creating the
2316            y vector for the matrix-vector product y = Ax.
2317 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2318            This value should be the same as the local size used in creating the
2319            x vector for the matrix-vector product y = Ax.
2320 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2321 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2322 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2323            submatrix (same for all local rows)
2324 . d_nnz - array containing the number of block nonzeros in the various block rows
2325            in the upper triangular portion of the in diagonal portion of the local
2326            (possibly different for each block block row) or `NULL`.
2327            If you plan to factor the matrix you must leave room for the diagonal entry and
2328            set its value even if it is zero.
2329 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2330            submatrix (same for all local rows).
2331 - o_nnz - array containing the number of nonzeros in the various block rows of the
2332            off-diagonal portion of the local submatrix (possibly different for
2333            each block row) or `NULL`.
2334 
2335   Output Parameter:
2336 . A - the matrix
2337 
2338   Options Database Keys:
2339 + -mat_no_unroll  - uses code that does not unroll the loops in the
2340                      block calculations (much slower)
2341 . -mat_block_size - size of the blocks to use
2342 - -mat_mpi        - use the parallel matrix data structures even on one processor
2343                (defaults to using SeqBAIJ format on one processor)
2344 
2345   Level: intermediate
2346 
2347   Notes:
2348   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2349   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2350   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2351 
2352   The number of rows and columns must be divisible by blocksize.
2353   This matrix type does not support complex Hermitian operation.
2354 
2355   The user MUST specify either the local or global matrix dimensions
2356   (possibly both).
2357 
2358   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2359   than it must be used on all processors that share the object for that argument.
2360 
2361   If the *_nnz parameter is given then the *_nz parameter is ignored
2362 
2363   Storage Information:
2364   For a square global matrix we define each processor's diagonal portion
2365   to be its local rows and the corresponding columns (a square submatrix);
2366   each processor's off-diagonal portion encompasses the remainder of the
2367   local matrix (a rectangular submatrix).
2368 
2369   The user can specify preallocated storage for the diagonal part of
2370   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2371   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2372   memory allocation. Likewise, specify preallocated storage for the
2373   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2374 
2375   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2376   the figure below we depict these three local rows and all columns (0-11).
2377 
2378 .vb
2379            0 1 2 3 4 5 6 7 8 9 10 11
2380           --------------------------
2381    row 3  |. . . d d d o o o o  o  o
2382    row 4  |. . . d d d o o o o  o  o
2383    row 5  |. . . d d d o o o o  o  o
2384           --------------------------
2385 .ve
2386 
2387   Thus, any entries in the d locations are stored in the d (diagonal)
2388   submatrix, and any entries in the o locations are stored in the
2389   o (off-diagonal) submatrix. Note that the d matrix is stored in
2390   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2391 
2392   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2393   plus the diagonal part of the d matrix,
2394   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2395   In general, for PDE problems in which most nonzeros are near the diagonal,
2396   one expects `d_nz` >> `o_nz`.
2397 
2398 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2399 @*/
2400 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2401 {
2402   PetscMPIInt size;
2403 
2404   PetscFunctionBegin;
2405   PetscCall(MatCreate(comm, A));
2406   PetscCall(MatSetSizes(*A, m, n, M, N));
2407   PetscCallMPI(MPI_Comm_size(comm, &size));
2408   if (size > 1) {
2409     PetscCall(MatSetType(*A, MATMPISBAIJ));
2410     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2411   } else {
2412     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2413     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2414   }
2415   PetscFunctionReturn(PETSC_SUCCESS);
2416 }
2417 
2418 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2419 {
2420   Mat           mat;
2421   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2422   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2423   PetscScalar  *array;
2424 
2425   PetscFunctionBegin;
2426   *newmat = NULL;
2427 
2428   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2429   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2430   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2431   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2432   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2433 
2434   mat->factortype   = matin->factortype;
2435   mat->preallocated = PETSC_TRUE;
2436   mat->assembled    = PETSC_TRUE;
2437   mat->insertmode   = NOT_SET_VALUES;
2438 
2439   a      = (Mat_MPISBAIJ *)mat->data;
2440   a->bs2 = oldmat->bs2;
2441   a->mbs = oldmat->mbs;
2442   a->nbs = oldmat->nbs;
2443   a->Mbs = oldmat->Mbs;
2444   a->Nbs = oldmat->Nbs;
2445 
2446   a->size         = oldmat->size;
2447   a->rank         = oldmat->rank;
2448   a->donotstash   = oldmat->donotstash;
2449   a->roworiented  = oldmat->roworiented;
2450   a->rowindices   = NULL;
2451   a->rowvalues    = NULL;
2452   a->getrowactive = PETSC_FALSE;
2453   a->barray       = NULL;
2454   a->rstartbs     = oldmat->rstartbs;
2455   a->rendbs       = oldmat->rendbs;
2456   a->cstartbs     = oldmat->cstartbs;
2457   a->cendbs       = oldmat->cendbs;
2458 
2459   /* hash table stuff */
2460   a->ht           = NULL;
2461   a->hd           = NULL;
2462   a->ht_size      = 0;
2463   a->ht_flag      = oldmat->ht_flag;
2464   a->ht_fact      = oldmat->ht_fact;
2465   a->ht_total_ct  = 0;
2466   a->ht_insert_ct = 0;
2467 
2468   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2469   if (oldmat->colmap) {
2470 #if defined(PETSC_USE_CTABLE)
2471     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2472 #else
2473     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2474     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2475 #endif
2476   } else a->colmap = NULL;
2477 
2478   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2479     PetscCall(PetscMalloc1(len, &a->garray));
2480     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2481   } else a->garray = NULL;
2482 
2483   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2484   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2485   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2486 
2487   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2488   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2489 
2490   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2491   PetscCall(VecGetArray(a->slvec1, &array));
2492   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2493   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2494   PetscCall(VecRestoreArray(a->slvec1, &array));
2495   PetscCall(VecGetArray(a->slvec0, &array));
2496   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2497   PetscCall(VecRestoreArray(a->slvec0, &array));
2498 
2499   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2500   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2501   a->sMvctx = oldmat->sMvctx;
2502 
2503   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2504   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2505   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2506   *newmat = mat;
2507   PetscFunctionReturn(PETSC_SUCCESS);
2508 }
2509 
2510 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2511 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2512 
2513 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2514 {
2515   PetscBool isbinary;
2516 
2517   PetscFunctionBegin;
2518   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2519   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2520   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2521   PetscFunctionReturn(PETSC_SUCCESS);
2522 }
2523 
2524 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2525 {
2526   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2527   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2528   PetscReal     atmp;
2529   PetscReal    *work, *svalues, *rvalues;
2530   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2531   PetscMPIInt   rank, size;
2532   PetscInt     *rowners_bs, dest, count, source;
2533   PetscScalar  *va;
2534   MatScalar    *ba;
2535   MPI_Status    stat;
2536 
2537   PetscFunctionBegin;
2538   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2539   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2540   PetscCall(VecGetArray(v, &va));
2541 
2542   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2543   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2544 
2545   bs  = A->rmap->bs;
2546   mbs = a->mbs;
2547   Mbs = a->Mbs;
2548   ba  = b->a;
2549   bi  = b->i;
2550   bj  = b->j;
2551 
2552   /* find ownerships */
2553   rowners_bs = A->rmap->range;
2554 
2555   /* each proc creates an array to be distributed */
2556   PetscCall(PetscCalloc1(bs * Mbs, &work));
2557 
2558   /* row_max for B */
2559   if (rank != size - 1) {
2560     for (i = 0; i < mbs; i++) {
2561       ncols = bi[1] - bi[0];
2562       bi++;
2563       brow = bs * i;
2564       for (j = 0; j < ncols; j++) {
2565         bcol = bs * (*bj);
2566         for (kcol = 0; kcol < bs; kcol++) {
2567           col = bcol + kcol;           /* local col index */
2568           col += rowners_bs[rank + 1]; /* global col index */
2569           for (krow = 0; krow < bs; krow++) {
2570             atmp = PetscAbsScalar(*ba);
2571             ba++;
2572             row = brow + krow; /* local row index */
2573             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2574             if (work[col] < atmp) work[col] = atmp;
2575           }
2576         }
2577         bj++;
2578       }
2579     }
2580 
2581     /* send values to its owners */
2582     for (dest = rank + 1; dest < size; dest++) {
2583       svalues = work + rowners_bs[dest];
2584       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2585       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2586     }
2587   }
2588 
2589   /* receive values */
2590   if (rank) {
2591     rvalues = work;
2592     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2593     for (source = 0; source < rank; source++) {
2594       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2595       /* process values */
2596       for (i = 0; i < count; i++) {
2597         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2598       }
2599     }
2600   }
2601 
2602   PetscCall(VecRestoreArray(v, &va));
2603   PetscCall(PetscFree(work));
2604   PetscFunctionReturn(PETSC_SUCCESS);
2605 }
2606 
2607 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2608 {
2609   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2610   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2611   PetscScalar       *x, *ptr, *from;
2612   Vec                bb1;
2613   const PetscScalar *b;
2614 
2615   PetscFunctionBegin;
2616   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2617   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2618 
2619   if (flag == SOR_APPLY_UPPER) {
2620     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2621     PetscFunctionReturn(PETSC_SUCCESS);
2622   }
2623 
2624   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2625     if (flag & SOR_ZERO_INITIAL_GUESS) {
2626       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2627       its--;
2628     }
2629 
2630     PetscCall(VecDuplicate(bb, &bb1));
2631     while (its--) {
2632       /* lower triangular part: slvec0b = - B^T*xx */
2633       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2634 
2635       /* copy xx into slvec0a */
2636       PetscCall(VecGetArray(mat->slvec0, &ptr));
2637       PetscCall(VecGetArray(xx, &x));
2638       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2639       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2640 
2641       PetscCall(VecScale(mat->slvec0, -1.0));
2642 
2643       /* copy bb into slvec1a */
2644       PetscCall(VecGetArray(mat->slvec1, &ptr));
2645       PetscCall(VecGetArrayRead(bb, &b));
2646       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2647       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2648 
2649       /* set slvec1b = 0 */
2650       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2651       PetscCall(VecZeroEntries(mat->slvec1b));
2652 
2653       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2654       PetscCall(VecRestoreArray(xx, &x));
2655       PetscCall(VecRestoreArrayRead(bb, &b));
2656       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2657 
2658       /* upper triangular part: bb1 = bb1 - B*x */
2659       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2660 
2661       /* local diagonal sweep */
2662       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2663     }
2664     PetscCall(VecDestroy(&bb1));
2665   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2666     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2667   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2668     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2669   } else if (flag & SOR_EISENSTAT) {
2670     Vec                xx1;
2671     PetscBool          hasop;
2672     const PetscScalar *diag;
2673     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2674     PetscInt           i, n;
2675 
2676     if (!mat->xx1) {
2677       PetscCall(VecDuplicate(bb, &mat->xx1));
2678       PetscCall(VecDuplicate(bb, &mat->bb1));
2679     }
2680     xx1 = mat->xx1;
2681     bb1 = mat->bb1;
2682 
2683     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2684 
2685     if (!mat->diag) {
2686       /* this is wrong for same matrix with new nonzero values */
2687       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2688       PetscCall(MatGetDiagonal(matin, mat->diag));
2689     }
2690     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2691 
2692     if (hasop) {
2693       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2694       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2695     } else {
2696       /*
2697           These two lines are replaced by code that may be a bit faster for a good compiler
2698       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2699       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2700       */
2701       PetscCall(VecGetArray(mat->slvec1a, &sl));
2702       PetscCall(VecGetArrayRead(mat->diag, &diag));
2703       PetscCall(VecGetArrayRead(bb, &b));
2704       PetscCall(VecGetArray(xx, &x));
2705       PetscCall(VecGetLocalSize(xx, &n));
2706       if (omega == 1.0) {
2707         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2708         PetscCall(PetscLogFlops(2.0 * n));
2709       } else {
2710         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2711         PetscCall(PetscLogFlops(3.0 * n));
2712       }
2713       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2714       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2715       PetscCall(VecRestoreArrayRead(bb, &b));
2716       PetscCall(VecRestoreArray(xx, &x));
2717     }
2718 
2719     /* multiply off-diagonal portion of matrix */
2720     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2721     PetscCall(VecZeroEntries(mat->slvec1b));
2722     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2723     PetscCall(VecGetArray(mat->slvec0, &from));
2724     PetscCall(VecGetArray(xx, &x));
2725     PetscCall(PetscArraycpy(from, x, bs * mbs));
2726     PetscCall(VecRestoreArray(mat->slvec0, &from));
2727     PetscCall(VecRestoreArray(xx, &x));
2728     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2729     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2730     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2731 
2732     /* local sweep */
2733     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2734     PetscCall(VecAXPY(xx, 1.0, xx1));
2735   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2736   PetscFunctionReturn(PETSC_SUCCESS);
2737 }
2738 
2739 /*@
2740   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2741   CSR format the local rows.
2742 
2743   Collective
2744 
2745   Input Parameters:
2746 + comm - MPI communicator
2747 . bs   - the block size, only a block size of 1 is supported
2748 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2749 . n    - This value should be the same as the local size used in creating the
2750        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2751        calculated if `N` is given) For square matrices `n` is almost always `m`.
2752 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2753 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2754 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2755 . j    - column indices
2756 - a    - matrix values
2757 
2758   Output Parameter:
2759 . mat - the matrix
2760 
2761   Level: intermediate
2762 
2763   Notes:
2764   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2765   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2766   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2767 
2768   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2769 
2770 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2771           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2772 @*/
2773 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2774 {
2775   PetscFunctionBegin;
2776   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2777   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2778   PetscCall(MatCreate(comm, mat));
2779   PetscCall(MatSetSizes(*mat, m, n, M, N));
2780   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2781   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2782   PetscFunctionReturn(PETSC_SUCCESS);
2783 }
2784 
2785 /*@C
2786   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2787 
2788   Collective
2789 
2790   Input Parameters:
2791 + B  - the matrix
2792 . bs - the block size
2793 . i  - the indices into `j` for the start of each local row (starts with zero)
2794 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2795 - v  - optional values in the matrix
2796 
2797   Level: advanced
2798 
2799   Notes:
2800   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2801   and usually the numerical values as well
2802 
2803   Any entries below the diagonal are ignored
2804 
2805 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2806 @*/
2807 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2808 {
2809   PetscFunctionBegin;
2810   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2811   PetscFunctionReturn(PETSC_SUCCESS);
2812 }
2813 
2814 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2815 {
2816   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2817   PetscInt    *indx;
2818   PetscScalar *values;
2819 
2820   PetscFunctionBegin;
2821   PetscCall(MatGetSize(inmat, &m, &N));
2822   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2823     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2824     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2825     PetscInt     *bindx, rmax = a->rmax, j;
2826     PetscMPIInt   rank, size;
2827 
2828     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2829     mbs = m / bs;
2830     Nbs = N / cbs;
2831     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2832     nbs = n / cbs;
2833 
2834     PetscCall(PetscMalloc1(rmax, &bindx));
2835     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2836 
2837     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2838     PetscCallMPI(MPI_Comm_rank(comm, &size));
2839     if (rank == size - 1) {
2840       /* Check sum(nbs) = Nbs */
2841       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2842     }
2843 
2844     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2845     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2846     for (i = 0; i < mbs; i++) {
2847       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2848       nnz = nnz / bs;
2849       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2850       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2851       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2852     }
2853     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2854     PetscCall(PetscFree(bindx));
2855 
2856     PetscCall(MatCreate(comm, outmat));
2857     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2858     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2859     PetscCall(MatSetType(*outmat, MATSBAIJ));
2860     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2861     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2862     MatPreallocateEnd(dnz, onz);
2863   }
2864 
2865   /* numeric phase */
2866   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2867   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2868 
2869   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2870   for (i = 0; i < m; i++) {
2871     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2872     Ii = i + rstart;
2873     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2874     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2875   }
2876   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2877   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2878   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2879   PetscFunctionReturn(PETSC_SUCCESS);
2880 }
2881