xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision f1957bc3d7b66ed8a0ce45d1acff8920f4c07d7b)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237   a_noinsert:; \
238     ailen[brow] = nrow; \
239   } while (0)
240 
241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
242   do { \
243     brow = row / bs; \
244     rp   = bj + bi[brow]; \
245     ap   = ba + bs2 * bi[brow]; \
246     rmax = bimax[brow]; \
247     nrow = bilen[brow]; \
248     bcol = col / bs; \
249     ridx = row % bs; \
250     cidx = col % bs; \
251     low  = 0; \
252     high = nrow; \
253     while (high - low > 3) { \
254       t = (low + high) / 2; \
255       if (rp[t] > bcol) high = t; \
256       else low = t; \
257     } \
258     for (_i = low; _i < high; _i++) { \
259       if (rp[_i] > bcol) break; \
260       if (rp[_i] == bcol) { \
261         bap = ap + bs2 * _i + bs * cidx + ridx; \
262         if (addv == ADD_VALUES) *bap += value; \
263         else *bap = value; \
264         goto b_noinsert; \
265       } \
266     } \
267     if (b->nonew == 1) goto b_noinsert; \
268     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
269     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
270     N = nrow++ - 1; \
271     /* shift up all the later entries in this row */ \
272     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
273     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
274     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
275     rp[_i]                          = bcol; \
276     ap[bs2 * _i + bs * cidx + ridx] = value; \
277   b_noinsert:; \
278     bilen[brow] = nrow; \
279   } while (0)
280 
281 /* Only add/insert a(i,j) with i<=j (blocks).
282    Any a(i,j) with i>j input by user is ignored or generates an error
283 */
284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
285 {
286   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
287   MatScalar     value;
288   PetscBool     roworiented = baij->roworiented;
289   PetscInt      i, j, row, col;
290   PetscInt      rstart_orig = mat->rmap->rstart;
291   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
292   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
293 
294   /* Some Variables required in the macro */
295   Mat           A     = baij->A;
296   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)A->data;
297   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
298   MatScalar    *aa = a->a;
299 
300   Mat          B     = baij->B;
301   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
302   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
303   MatScalar   *ba = b->a;
304 
305   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
306   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
307   MatScalar *ap, *bap;
308 
309   /* for stash */
310   PetscInt   n_loc, *in_loc = NULL;
311   MatScalar *v_loc = NULL;
312 
313   PetscFunctionBegin;
314   if (!baij->donotstash) {
315     if (n > baij->n_loc) {
316       PetscCall(PetscFree(baij->in_loc));
317       PetscCall(PetscFree(baij->v_loc));
318       PetscCall(PetscMalloc1(n, &baij->in_loc));
319       PetscCall(PetscMalloc1(n, &baij->v_loc));
320 
321       baij->n_loc = n;
322     }
323     in_loc = baij->in_loc;
324     v_loc  = baij->v_loc;
325   }
326 
327   for (i = 0; i < m; i++) {
328     if (im[i] < 0) continue;
329     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
330     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
331       row = im[i] - rstart_orig;                     /* local row index */
332       for (j = 0; j < n; j++) {
333         if (im[i] / bs > in[j] / bs) {
334           PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
335           continue; /* ignore lower triangular blocks */
336         }
337         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
338           col  = in[j] - cstart_orig;                    /* local col index */
339           brow = row / bs;
340           bcol = col / bs;
341           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
342           if (roworiented) value = v[i * n + j];
343           else value = v[i + j * m];
344           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
345           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
346         } else if (in[j] < 0) {
347           continue;
348         } else {
349           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
350           /* off-diag entry (B) */
351           if (mat->was_assembled) {
352             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
353 #if defined(PETSC_USE_CTABLE)
354             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
355             col = col - 1;
356 #else
357             col = baij->colmap[in[j] / bs] - 1;
358 #endif
359             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
360               PetscCall(MatDisAssemble_MPISBAIJ(mat));
361               col = in[j];
362               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
363               B     = baij->B;
364               b     = (Mat_SeqBAIJ *)B->data;
365               bimax = b->imax;
366               bi    = b->i;
367               bilen = b->ilen;
368               bj    = b->j;
369               ba    = b->a;
370             } else col += in[j] % bs;
371           } else col = in[j];
372           if (roworiented) value = v[i * n + j];
373           else value = v[i + j * m];
374           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
375           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
376         }
377       }
378     } else { /* off processor entry */
379       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
380       if (!baij->donotstash) {
381         mat->assembled = PETSC_FALSE;
382         n_loc          = 0;
383         for (j = 0; j < n; j++) {
384           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
385           in_loc[n_loc] = in[j];
386           if (roworiented) {
387             v_loc[n_loc] = v[i * n + j];
388           } else {
389             v_loc[n_loc] = v[j * m + i];
390           }
391           n_loc++;
392         }
393         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
394       }
395     }
396   }
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
401 {
402   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
403   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
404   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
405   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
406   PetscBool          roworiented = a->roworiented;
407   const PetscScalar *value       = v;
408   MatScalar         *ap, *aa = a->a, *bap;
409 
410   PetscFunctionBegin;
411   if (col < row) {
412     PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
413     PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
414   }
415   rp    = aj + ai[row];
416   ap    = aa + bs2 * ai[row];
417   rmax  = imax[row];
418   nrow  = ailen[row];
419   value = v;
420   low   = 0;
421   high  = nrow;
422 
423   while (high - low > 7) {
424     t = (low + high) / 2;
425     if (rp[t] > col) high = t;
426     else low = t;
427   }
428   for (i = low; i < high; i++) {
429     if (rp[i] > col) break;
430     if (rp[i] == col) {
431       bap = ap + bs2 * i;
432       if (roworiented) {
433         if (is == ADD_VALUES) {
434           for (ii = 0; ii < bs; ii++) {
435             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
436           }
437         } else {
438           for (ii = 0; ii < bs; ii++) {
439             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
440           }
441         }
442       } else {
443         if (is == ADD_VALUES) {
444           for (ii = 0; ii < bs; ii++) {
445             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
446           }
447         } else {
448           for (ii = 0; ii < bs; ii++) {
449             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
450           }
451         }
452       }
453       goto noinsert2;
454     }
455   }
456   if (nonew == 1) goto noinsert2;
457   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
458   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
459   N = nrow++ - 1;
460   high++;
461   /* shift up all the later entries in this row */
462   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
463   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
464   rp[i] = col;
465   bap   = ap + bs2 * i;
466   if (roworiented) {
467     for (ii = 0; ii < bs; ii++) {
468       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
469     }
470   } else {
471     for (ii = 0; ii < bs; ii++) {
472       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
473     }
474   }
475 noinsert2:;
476   ailen[row] = nrow;
477   PetscFunctionReturn(PETSC_SUCCESS);
478 }
479 
480 /*
481    This routine is exactly duplicated in mpibaij.c
482 */
483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
484 {
485   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
486   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
487   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
488   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
489   PetscBool          roworiented = a->roworiented;
490   const PetscScalar *value       = v;
491   MatScalar         *ap, *aa = a->a, *bap;
492 
493   PetscFunctionBegin;
494   rp    = aj + ai[row];
495   ap    = aa + bs2 * ai[row];
496   rmax  = imax[row];
497   nrow  = ailen[row];
498   low   = 0;
499   high  = nrow;
500   value = v;
501   while (high - low > 7) {
502     t = (low + high) / 2;
503     if (rp[t] > col) high = t;
504     else low = t;
505   }
506   for (i = low; i < high; i++) {
507     if (rp[i] > col) break;
508     if (rp[i] == col) {
509       bap = ap + bs2 * i;
510       if (roworiented) {
511         if (is == ADD_VALUES) {
512           for (ii = 0; ii < bs; ii++) {
513             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
514           }
515         } else {
516           for (ii = 0; ii < bs; ii++) {
517             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
518           }
519         }
520       } else {
521         if (is == ADD_VALUES) {
522           for (ii = 0; ii < bs; ii++, value += bs) {
523             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
524             bap += bs;
525           }
526         } else {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
529             bap += bs;
530           }
531         }
532       }
533       goto noinsert2;
534     }
535   }
536   if (nonew == 1) goto noinsert2;
537   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
538   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
539   N = nrow++ - 1;
540   high++;
541   /* shift up all the later entries in this row */
542   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
543   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
544   rp[i] = col;
545   bap   = ap + bs2 * i;
546   if (roworiented) {
547     for (ii = 0; ii < bs; ii++) {
548       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
549     }
550   } else {
551     for (ii = 0; ii < bs; ii++) {
552       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
553     }
554   }
555 noinsert2:;
556   ailen[row] = nrow;
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 /*
561     This routine could be optimized by removing the need for the block copy below and passing stride information
562   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
563 */
564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
565 {
566   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
567   const MatScalar *value;
568   MatScalar       *barray      = baij->barray;
569   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
570   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
571   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
572   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
573 
574   PetscFunctionBegin;
575   if (!barray) {
576     PetscCall(PetscMalloc1(bs2, &barray));
577     baij->barray = barray;
578   }
579 
580   if (roworiented) {
581     stepval = (n - 1) * bs;
582   } else {
583     stepval = (m - 1) * bs;
584   }
585   for (i = 0; i < m; i++) {
586     if (im[i] < 0) continue;
587     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
588     if (im[i] >= rstart && im[i] < rend) {
589       row = im[i] - rstart;
590       for (j = 0; j < n; j++) {
591         if (im[i] > in[j]) {
592           PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
593           continue; /* ignore lower triangular blocks */
594         }
595         /* If NumCol = 1 then a copy is not required */
596         if (roworiented && n == 1) {
597           barray = (MatScalar *)v + i * bs2;
598         } else if ((!roworiented) && (m == 1)) {
599           barray = (MatScalar *)v + j * bs2;
600         } else { /* Here a copy is required */
601           if (roworiented) {
602             value = v + i * (stepval + bs) * bs + j * bs;
603           } else {
604             value = v + j * (stepval + bs) * bs + i * bs;
605           }
606           for (ii = 0; ii < bs; ii++, value += stepval) {
607             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
608           }
609           barray -= bs2;
610         }
611 
612         if (in[j] >= cstart && in[j] < cend) {
613           col = in[j] - cstart;
614           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
615         } else if (in[j] < 0) {
616           continue;
617         } else {
618           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
619           if (mat->was_assembled) {
620             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
621 
622 #if defined(PETSC_USE_CTABLE)
623             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
624             col = col < 1 ? -1 : (col - 1) / bs;
625 #else
626             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
627 #endif
628             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
629               PetscCall(MatDisAssemble_MPISBAIJ(mat));
630               col = in[j];
631             }
632           } else col = in[j];
633           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
634         }
635       }
636     } else {
637       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
651 {
652   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
653   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
654   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
655 
656   PetscFunctionBegin;
657   for (i = 0; i < m; i++) {
658     if (idxm[i] < 0) continue; /* negative row */
659     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
660     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
661     row = idxm[i] - bsrstart;
662     for (j = 0; j < n; j++) {
663       if (idxn[j] < 0) continue; /* negative column */
664       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
665       if (idxn[j] >= bscstart && idxn[j] < bscend) {
666         col = idxn[j] - bscstart;
667         PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
668       } else {
669         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
670 #if defined(PETSC_USE_CTABLE)
671         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
672         data--;
673 #else
674         data = baij->colmap[idxn[j] / bs] - 1;
675 #endif
676         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
677         else {
678           col = data + idxn[j] % bs;
679           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
680         }
681       }
682     }
683   }
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
688 {
689   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
690   PetscReal     sum[2], *lnorm2;
691 
692   PetscFunctionBegin;
693   if (baij->size == 1) {
694     PetscCall(MatNorm(baij->A, type, norm));
695   } else {
696     if (type == NORM_FROBENIUS) {
697       PetscCall(PetscMalloc1(2, &lnorm2));
698       PetscCall(MatNorm(baij->A, type, lnorm2));
699       *lnorm2 = (*lnorm2) * (*lnorm2);
700       lnorm2++; /* square power of norm(A) */
701       PetscCall(MatNorm(baij->B, type, lnorm2));
702       *lnorm2 = (*lnorm2) * (*lnorm2);
703       lnorm2--; /* square power of norm(B) */
704       PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
705       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
706       PetscCall(PetscFree(lnorm2));
707     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
708       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
709       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
710       PetscReal    *rsum, vabs;
711       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
712       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
713       MatScalar    *v;
714 
715       PetscCall(PetscCalloc1(mat->cmap->N, &rsum));
716       /* Amat */
717       v  = amat->a;
718       jj = amat->j;
719       for (brow = 0; brow < mbs; brow++) {
720         grow = bs * (rstart + brow);
721         nz   = amat->i[brow + 1] - amat->i[brow];
722         for (bcol = 0; bcol < nz; bcol++) {
723           gcol = bs * (rstart + *jj);
724           jj++;
725           for (col = 0; col < bs; col++) {
726             for (row = 0; row < bs; row++) {
727               vabs = PetscAbsScalar(*v);
728               v++;
729               rsum[gcol + col] += vabs;
730               /* non-diagonal block */
731               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
732             }
733           }
734         }
735         PetscCall(PetscLogFlops(nz * bs * bs));
736       }
737       /* Bmat */
738       v  = bmat->a;
739       jj = bmat->j;
740       for (brow = 0; brow < mbs; brow++) {
741         grow = bs * (rstart + brow);
742         nz   = bmat->i[brow + 1] - bmat->i[brow];
743         for (bcol = 0; bcol < nz; bcol++) {
744           gcol = bs * garray[*jj];
745           jj++;
746           for (col = 0; col < bs; col++) {
747             for (row = 0; row < bs; row++) {
748               vabs = PetscAbsScalar(*v);
749               v++;
750               rsum[gcol + col] += vabs;
751               rsum[grow + row] += vabs;
752             }
753           }
754         }
755         PetscCall(PetscLogFlops(nz * bs * bs));
756       }
757       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
758       *norm = 0.0;
759       for (col = 0; col < mat->cmap->N; col++) {
760         if (rsum[col] > *norm) *norm = rsum[col];
761       }
762       PetscCall(PetscFree(rsum));
763     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
764   }
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
769 {
770   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
771   PetscInt      nstash, reallocs;
772 
773   PetscFunctionBegin;
774   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
775 
776   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
777   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
778   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
779   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
780   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
781   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
782   PetscFunctionReturn(PETSC_SUCCESS);
783 }
784 
785 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
786 {
787   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
788   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
789   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
790   PetscInt     *row, *col;
791   PetscBool     all_assembled;
792   PetscMPIInt   n;
793   PetscBool     r1, r2, r3;
794   MatScalar    *val;
795 
796   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
797   PetscFunctionBegin;
798   if (!baij->donotstash && !mat->nooffprocentries) {
799     while (1) {
800       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
801       if (!flg) break;
802 
803       for (i = 0; i < n;) {
804         /* Now identify the consecutive vals belonging to the same row */
805         for (j = i, rstart = row[j]; j < n; j++) {
806           if (row[j] != rstart) break;
807         }
808         if (j < n) ncols = j - i;
809         else ncols = n - i;
810         /* Now assemble all these values with a single function call */
811         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
812         i = j;
813       }
814     }
815     PetscCall(MatStashScatterEnd_Private(&mat->stash));
816     /* Now process the block-stash. Since the values are stashed column-oriented,
817        set the row-oriented flag to column-oriented, and after MatSetValues()
818        restore the original flags */
819     r1 = baij->roworiented;
820     r2 = a->roworiented;
821     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
822 
823     baij->roworiented = PETSC_FALSE;
824     a->roworiented    = PETSC_FALSE;
825 
826     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */
827     while (1) {
828       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
829       if (!flg) break;
830 
831       for (i = 0; i < n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j = i, rstart = row[j]; j < n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j - i;
837         else ncols = n - i;
838         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
839         i = j;
840       }
841     }
842     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
843 
844     baij->roworiented = r1;
845     a->roworiented    = r2;
846 
847     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
848   }
849 
850   PetscCall(MatAssemblyBegin(baij->A, mode));
851   PetscCall(MatAssemblyEnd(baij->A, mode));
852 
853   /* determine if any process has disassembled, if so we must
854      also disassemble ourselves, in order that we may reassemble. */
855   /*
856      if nonzero structure of submatrix B cannot change then we know that
857      no process disassembled thus we can skip this stuff
858   */
859   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
860     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
861     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
862   }
863 
864   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
865   PetscCall(MatAssemblyBegin(baij->B, mode));
866   PetscCall(MatAssemblyEnd(baij->B, mode));
867 
868   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
869 
870   baij->rowvalues = NULL;
871 
872   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
873   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
874     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
875     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
876   }
877   PetscFunctionReturn(PETSC_SUCCESS);
878 }
879 
880 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
881 #include <petscdraw.h>
882 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
883 {
884   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
885   PetscInt          bs   = mat->rmap->bs;
886   PetscMPIInt       rank = baij->rank;
887   PetscBool         isascii, isdraw;
888   PetscViewer       sviewer;
889   PetscViewerFormat format;
890 
891   PetscFunctionBegin;
892   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
893   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
894   if (isascii) {
895     PetscCall(PetscViewerGetFormat(viewer, &format));
896     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
897       MatInfo info;
898       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
899       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
900       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
901       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
902                                                    mat->rmap->bs, info.memory));
903       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
904       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
905       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
906       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
907       PetscCall(PetscViewerFlush(viewer));
908       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
909       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
910       PetscCall(VecScatterView(baij->Mvctx, viewer));
911       PetscFunctionReturn(PETSC_SUCCESS);
912     } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_FACTOR_INFO) PetscFunctionReturn(PETSC_SUCCESS);
913   }
914 
915   if (isdraw) {
916     PetscDraw draw;
917     PetscBool isnull;
918     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
919     PetscCall(PetscDrawIsNull(draw, &isnull));
920     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
921   }
922 
923   {
924     /* assemble the entire matrix onto first processor. */
925     Mat           A;
926     Mat_SeqSBAIJ *Aloc;
927     Mat_SeqBAIJ  *Bloc;
928     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
929     MatScalar    *a;
930     const char   *matname;
931 
932     /* Should this be the same type as mat? */
933     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
934     if (rank == 0) {
935       PetscCall(MatSetSizes(A, M, N, M, N));
936     } else {
937       PetscCall(MatSetSizes(A, 0, 0, M, N));
938     }
939     PetscCall(MatSetType(A, MATMPISBAIJ));
940     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
941     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
942 
943     /* copy over the A part */
944     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
945     ai   = Aloc->i;
946     aj   = Aloc->j;
947     a    = Aloc->a;
948     PetscCall(PetscMalloc1(bs, &rvals));
949 
950     for (i = 0; i < mbs; i++) {
951       rvals[0] = bs * (baij->rstartbs + i);
952       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
953       for (j = ai[i]; j < ai[i + 1]; j++) {
954         col = (baij->cstartbs + aj[j]) * bs;
955         for (k = 0; k < bs; k++) {
956           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
957           col++;
958           a += bs;
959         }
960       }
961     }
962     /* copy over the B part */
963     Bloc = (Mat_SeqBAIJ *)baij->B->data;
964     ai   = Bloc->i;
965     aj   = Bloc->j;
966     a    = Bloc->a;
967     for (i = 0; i < mbs; i++) {
968       rvals[0] = bs * (baij->rstartbs + i);
969       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
970       for (j = ai[i]; j < ai[i + 1]; j++) {
971         col = baij->garray[aj[j]] * bs;
972         for (k = 0; k < bs; k++) {
973           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
974           col++;
975           a += bs;
976         }
977       }
978     }
979     PetscCall(PetscFree(rvals));
980     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
981     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
982     /*
983        Everyone has to call to draw the matrix since the graphics waits are
984        synchronized across all processors that share the PetscDraw object
985     */
986     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
987     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
988     if (rank == 0) {
989       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
990       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
991     }
992     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
993     PetscCall(MatDestroy(&A));
994   }
995   PetscFunctionReturn(PETSC_SUCCESS);
996 }
997 
998 /* Used for both MPIBAIJ and MPISBAIJ matrices */
999 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1000 
1001 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1002 {
1003   PetscBool isascii, isdraw, issocket, isbinary;
1004 
1005   PetscFunctionBegin;
1006   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1007   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1008   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1009   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1010   if (isascii || isdraw || issocket) PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1011   else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1012   PetscFunctionReturn(PETSC_SUCCESS);
1013 }
1014 
1015 #if defined(PETSC_USE_COMPLEX)
1016 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1017 {
1018   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1019   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1020   PetscScalar       *from;
1021   const PetscScalar *x;
1022 
1023   PetscFunctionBegin;
1024   /* diagonal part */
1025   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1026   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1027   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1028   PetscCall(VecZeroEntries(a->slvec1b));
1029 
1030   /* subdiagonal part */
1031   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1032   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1033 
1034   /* copy x into the vec slvec0 */
1035   PetscCall(VecGetArray(a->slvec0, &from));
1036   PetscCall(VecGetArrayRead(xx, &x));
1037 
1038   PetscCall(PetscArraycpy(from, x, bs * mbs));
1039   PetscCall(VecRestoreArray(a->slvec0, &from));
1040   PetscCall(VecRestoreArrayRead(xx, &x));
1041 
1042   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1043   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1044   /* supperdiagonal part */
1045   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1046   PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048 #endif
1049 
1050 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1051 {
1052   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1053   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1054   PetscScalar       *from;
1055   const PetscScalar *x;
1056 
1057   PetscFunctionBegin;
1058   /* diagonal part */
1059   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1060   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1061   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1062   PetscCall(VecZeroEntries(a->slvec1b));
1063 
1064   /* subdiagonal part */
1065   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1066 
1067   /* copy x into the vec slvec0 */
1068   PetscCall(VecGetArray(a->slvec0, &from));
1069   PetscCall(VecGetArrayRead(xx, &x));
1070 
1071   PetscCall(PetscArraycpy(from, x, bs * mbs));
1072   PetscCall(VecRestoreArray(a->slvec0, &from));
1073   PetscCall(VecRestoreArrayRead(xx, &x));
1074 
1075   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1076   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1077   /* supperdiagonal part */
1078   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1079   PetscFunctionReturn(PETSC_SUCCESS);
1080 }
1081 
1082 #if PetscDefined(USE_COMPLEX)
1083 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1084 {
1085   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1086   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1087   PetscScalar       *from;
1088   const PetscScalar *x;
1089 
1090   PetscFunctionBegin;
1091   /* diagonal part */
1092   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1093   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1094   PetscCall(VecZeroEntries(a->slvec1b));
1095 
1096   /* subdiagonal part */
1097   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1098   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1099 
1100   /* copy x into the vec slvec0 */
1101   PetscCall(VecGetArray(a->slvec0, &from));
1102   PetscCall(VecGetArrayRead(xx, &x));
1103   PetscCall(PetscArraycpy(from, x, bs * mbs));
1104   PetscCall(VecRestoreArray(a->slvec0, &from));
1105 
1106   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1107   PetscCall(VecRestoreArrayRead(xx, &x));
1108   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1109 
1110   /* supperdiagonal part */
1111   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1112   PetscFunctionReturn(PETSC_SUCCESS);
1113 }
1114 #endif
1115 
1116 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1117 {
1118   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1119   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1120   PetscScalar       *from;
1121   const PetscScalar *x;
1122 
1123   PetscFunctionBegin;
1124   /* diagonal part */
1125   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1126   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1127   PetscCall(VecZeroEntries(a->slvec1b));
1128 
1129   /* subdiagonal part */
1130   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1131 
1132   /* copy x into the vec slvec0 */
1133   PetscCall(VecGetArray(a->slvec0, &from));
1134   PetscCall(VecGetArrayRead(xx, &x));
1135   PetscCall(PetscArraycpy(from, x, bs * mbs));
1136   PetscCall(VecRestoreArray(a->slvec0, &from));
1137 
1138   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1139   PetscCall(VecRestoreArrayRead(xx, &x));
1140   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1141 
1142   /* supperdiagonal part */
1143   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 /*
1148   This only works correctly for square matrices where the subblock A->A is the
1149    diagonal block
1150 */
1151 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1152 {
1153   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1154 
1155   PetscFunctionBegin;
1156   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1157   PetscCall(MatGetDiagonal(a->A, v));
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1162 {
1163   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCall(MatScale(a->A, aa));
1167   PetscCall(MatScale(a->B, aa));
1168   PetscFunctionReturn(PETSC_SUCCESS);
1169 }
1170 
1171 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1172 {
1173   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1174   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1175   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1176   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1177   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1178 
1179   PetscFunctionBegin;
1180   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1181   mat->getrowactive = PETSC_TRUE;
1182 
1183   if (!mat->rowvalues && (idx || v)) {
1184     /*
1185         allocate enough space to hold information from the longest row.
1186     */
1187     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1188     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1189     PetscInt      max = 1, mbs = mat->mbs, tmp;
1190     for (i = 0; i < mbs; i++) {
1191       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1192       if (max < tmp) max = tmp;
1193     }
1194     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1195   }
1196 
1197   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1198   lrow = row - brstart; /* local row index */
1199 
1200   pvA = &vworkA;
1201   pcA = &cworkA;
1202   pvB = &vworkB;
1203   pcB = &cworkB;
1204   if (!v) {
1205     pvA = NULL;
1206     pvB = NULL;
1207   }
1208   if (!idx) {
1209     pcA = NULL;
1210     if (!v) pcB = NULL;
1211   }
1212   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1213   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1214   nztot = nzA + nzB;
1215 
1216   cmap = mat->garray;
1217   if (v || idx) {
1218     if (nztot) {
1219       /* Sort by increasing column numbers, assuming A and B already sorted */
1220       PetscInt imark = -1;
1221       if (v) {
1222         *v = v_p = mat->rowvalues;
1223         for (i = 0; i < nzB; i++) {
1224           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1225           else break;
1226         }
1227         imark = i;
1228         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1229         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1230       }
1231       if (idx) {
1232         *idx = idx_p = mat->rowindices;
1233         if (imark > -1) {
1234           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1235         } else {
1236           for (i = 0; i < nzB; i++) {
1237             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1238             else break;
1239           }
1240           imark = i;
1241         }
1242         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1243         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1244       }
1245     } else {
1246       if (idx) *idx = NULL;
1247       if (v) *v = NULL;
1248     }
1249   }
1250   *nz = nztot;
1251   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1252   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1253   PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255 
1256 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1257 {
1258   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1259 
1260   PetscFunctionBegin;
1261   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1262   baij->getrowactive = PETSC_FALSE;
1263   PetscFunctionReturn(PETSC_SUCCESS);
1264 }
1265 
1266 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1267 {
1268   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1269   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1270 
1271   PetscFunctionBegin;
1272   aA->getrow_utriangular = PETSC_TRUE;
1273   PetscFunctionReturn(PETSC_SUCCESS);
1274 }
1275 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1276 {
1277   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1278   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1279 
1280   PetscFunctionBegin;
1281   aA->getrow_utriangular = PETSC_FALSE;
1282   PetscFunctionReturn(PETSC_SUCCESS);
1283 }
1284 
1285 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1286 {
1287   PetscFunctionBegin;
1288   if (PetscDefined(USE_COMPLEX)) {
1289     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1290 
1291     PetscCall(MatConjugate(a->A));
1292     PetscCall(MatConjugate(a->B));
1293   }
1294   PetscFunctionReturn(PETSC_SUCCESS);
1295 }
1296 
1297 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1298 {
1299   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1300 
1301   PetscFunctionBegin;
1302   PetscCall(MatRealPart(a->A));
1303   PetscCall(MatRealPart(a->B));
1304   PetscFunctionReturn(PETSC_SUCCESS);
1305 }
1306 
1307 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1308 {
1309   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1310 
1311   PetscFunctionBegin;
1312   PetscCall(MatImaginaryPart(a->A));
1313   PetscCall(MatImaginaryPart(a->B));
1314   PetscFunctionReturn(PETSC_SUCCESS);
1315 }
1316 
1317 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1318    Input: isrow       - distributed(parallel),
1319           iscol_local - locally owned (seq)
1320 */
1321 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1322 {
1323   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1324   const PetscInt *ptr1, *ptr2;
1325 
1326   PetscFunctionBegin;
1327   *flg = PETSC_FALSE;
1328   PetscCall(ISGetLocalSize(isrow, &sz1));
1329   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1330   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1331 
1332   PetscCall(ISGetIndices(isrow, &ptr1));
1333   PetscCall(ISGetIndices(iscol_local, &ptr2));
1334 
1335   PetscCall(PetscMalloc1(sz1, &a1));
1336   PetscCall(PetscMalloc1(sz2, &a2));
1337   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1338   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1339   PetscCall(PetscSortInt(sz1, a1));
1340   PetscCall(PetscSortInt(sz2, a2));
1341 
1342   nmatch = 0;
1343   k      = 0;
1344   for (i = 0; i < sz1; i++) {
1345     for (j = k; j < sz2; j++) {
1346       if (a1[i] == a2[j]) {
1347         k = j;
1348         nmatch++;
1349         break;
1350       }
1351     }
1352   }
1353   PetscCall(ISRestoreIndices(isrow, &ptr1));
1354   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1355   PetscCall(PetscFree(a1));
1356   PetscCall(PetscFree(a2));
1357   if (nmatch < sz1) {
1358     *flg = PETSC_FALSE;
1359   } else {
1360     *flg = PETSC_TRUE;
1361   }
1362   PetscFunctionReturn(PETSC_SUCCESS);
1363 }
1364 
1365 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1366 {
1367   Mat       C[2];
1368   IS        iscol_local, isrow_local;
1369   PetscInt  csize, csize_local, rsize;
1370   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1371 
1372   PetscFunctionBegin;
1373   PetscCall(ISGetLocalSize(iscol, &csize));
1374   PetscCall(ISGetLocalSize(isrow, &rsize));
1375   if (call == MAT_REUSE_MATRIX) {
1376     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1377     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1378   } else {
1379     PetscCall(ISAllGather(iscol, &iscol_local));
1380     PetscCall(ISSorted(iscol_local, &issorted));
1381     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1382   }
1383   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1384   if (!isequal) {
1385     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1386     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1387     if (!isidentity) {
1388       if (call == MAT_REUSE_MATRIX) {
1389         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1390         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1391       } else {
1392         PetscCall(ISAllGather(isrow, &isrow_local));
1393         PetscCall(ISSorted(isrow_local, &issorted));
1394         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1395       }
1396     }
1397   }
1398   /* now call MatCreateSubMatrix_MPIBAIJ() */
1399   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1400   if (!isequal && !isidentity) {
1401     if (call == MAT_INITIAL_MATRIX) {
1402       IS       intersect;
1403       PetscInt ni;
1404 
1405       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1406       PetscCall(ISGetLocalSize(intersect, &ni));
1407       PetscCall(ISDestroy(&intersect));
1408       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1409     }
1410     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1411     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1412     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1413     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1414     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1415     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1416     PetscCall(MatDestroy(C));
1417     PetscCall(MatDestroy(C + 1));
1418   }
1419   if (call == MAT_INITIAL_MATRIX) {
1420     if (!isequal && !isidentity) {
1421       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1422       PetscCall(ISDestroy(&isrow_local));
1423     }
1424     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1425     PetscCall(ISDestroy(&iscol_local));
1426   }
1427   PetscFunctionReturn(PETSC_SUCCESS);
1428 }
1429 
1430 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1431 {
1432   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1433 
1434   PetscFunctionBegin;
1435   PetscCall(MatZeroEntries(l->A));
1436   PetscCall(MatZeroEntries(l->B));
1437   PetscFunctionReturn(PETSC_SUCCESS);
1438 }
1439 
1440 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1441 {
1442   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1443   Mat            A = a->A, B = a->B;
1444   PetscLogDouble isend[5], irecv[5];
1445 
1446   PetscFunctionBegin;
1447   info->block_size = (PetscReal)matin->rmap->bs;
1448 
1449   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1450 
1451   isend[0] = info->nz_used;
1452   isend[1] = info->nz_allocated;
1453   isend[2] = info->nz_unneeded;
1454   isend[3] = info->memory;
1455   isend[4] = info->mallocs;
1456 
1457   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1458 
1459   isend[0] += info->nz_used;
1460   isend[1] += info->nz_allocated;
1461   isend[2] += info->nz_unneeded;
1462   isend[3] += info->memory;
1463   isend[4] += info->mallocs;
1464   if (flag == MAT_LOCAL) {
1465     info->nz_used      = isend[0];
1466     info->nz_allocated = isend[1];
1467     info->nz_unneeded  = isend[2];
1468     info->memory       = isend[3];
1469     info->mallocs      = isend[4];
1470   } else if (flag == MAT_GLOBAL_MAX) {
1471     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1472 
1473     info->nz_used      = irecv[0];
1474     info->nz_allocated = irecv[1];
1475     info->nz_unneeded  = irecv[2];
1476     info->memory       = irecv[3];
1477     info->mallocs      = irecv[4];
1478   } else if (flag == MAT_GLOBAL_SUM) {
1479     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1480 
1481     info->nz_used      = irecv[0];
1482     info->nz_allocated = irecv[1];
1483     info->nz_unneeded  = irecv[2];
1484     info->memory       = irecv[3];
1485     info->mallocs      = irecv[4];
1486   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1487   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1488   info->fill_ratio_needed = 0;
1489   info->factor_mallocs    = 0;
1490   PetscFunctionReturn(PETSC_SUCCESS);
1491 }
1492 
1493 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1494 {
1495   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1496   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1497 
1498   PetscFunctionBegin;
1499   switch (op) {
1500   case MAT_NEW_NONZERO_LOCATIONS:
1501   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1502   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1503   case MAT_KEEP_NONZERO_PATTERN:
1504   case MAT_NEW_NONZERO_LOCATION_ERR:
1505     MatCheckPreallocated(A, 1);
1506     PetscCall(MatSetOption(a->A, op, flg));
1507     PetscCall(MatSetOption(a->B, op, flg));
1508     break;
1509   case MAT_ROW_ORIENTED:
1510     MatCheckPreallocated(A, 1);
1511     a->roworiented = flg;
1512 
1513     PetscCall(MatSetOption(a->A, op, flg));
1514     PetscCall(MatSetOption(a->B, op, flg));
1515     break;
1516   case MAT_IGNORE_OFF_PROC_ENTRIES:
1517     a->donotstash = flg;
1518     break;
1519   case MAT_USE_HASH_TABLE:
1520     a->ht_flag = flg;
1521     break;
1522   case MAT_HERMITIAN:
1523     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1524 #if defined(PETSC_USE_COMPLEX)
1525     if (flg) { /* need different mat-vec ops */
1526       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1527       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1528       A->ops->multtranspose    = NULL;
1529       A->ops->multtransposeadd = NULL;
1530     }
1531 #endif
1532     break;
1533   case MAT_SPD:
1534   case MAT_SYMMETRIC:
1535     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1536 #if defined(PETSC_USE_COMPLEX)
1537     if (flg) { /* restore to use default mat-vec ops */
1538       A->ops->mult             = MatMult_MPISBAIJ;
1539       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1540       A->ops->multtranspose    = MatMult_MPISBAIJ;
1541       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1542     }
1543 #endif
1544     break;
1545   case MAT_STRUCTURALLY_SYMMETRIC:
1546     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1547     break;
1548   case MAT_IGNORE_LOWER_TRIANGULAR:
1549   case MAT_ERROR_LOWER_TRIANGULAR:
1550     aA->ignore_ltriangular = flg;
1551     break;
1552   case MAT_GETROW_UPPERTRIANGULAR:
1553     aA->getrow_utriangular = flg;
1554     break;
1555   default:
1556     break;
1557   }
1558   PetscFunctionReturn(PETSC_SUCCESS);
1559 }
1560 
1561 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1562 {
1563   PetscFunctionBegin;
1564   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1565   if (reuse == MAT_INITIAL_MATRIX) {
1566     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1567   } else if (reuse == MAT_REUSE_MATRIX) {
1568     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1569   }
1570   PetscFunctionReturn(PETSC_SUCCESS);
1571 }
1572 
1573 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1574 {
1575   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1576   Mat           a = baij->A, b = baij->B;
1577   PetscInt      nv, m, n;
1578 
1579   PetscFunctionBegin;
1580   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1581 
1582   PetscCall(MatGetLocalSize(mat, &m, &n));
1583   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1584 
1585   PetscCall(VecGetLocalSize(rr, &nv));
1586   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1587 
1588   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1589 
1590   /* left diagonalscale the off-diagonal part */
1591   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1592 
1593   /* scale the diagonal part */
1594   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1595 
1596   /* right diagonalscale the off-diagonal part */
1597   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1598   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1603 {
1604   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1605 
1606   PetscFunctionBegin;
1607   PetscCall(MatSetUnfactored(a->A));
1608   PetscFunctionReturn(PETSC_SUCCESS);
1609 }
1610 
1611 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1612 
1613 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1614 {
1615   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1616   Mat           a, b, c, d;
1617   PetscBool     flg;
1618 
1619   PetscFunctionBegin;
1620   a = matA->A;
1621   b = matA->B;
1622   c = matB->A;
1623   d = matB->B;
1624 
1625   PetscCall(MatEqual(a, c, &flg));
1626   if (flg) PetscCall(MatEqual(b, d, &flg));
1627   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1628   PetscFunctionReturn(PETSC_SUCCESS);
1629 }
1630 
1631 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1632 {
1633   PetscBool isbaij;
1634 
1635   PetscFunctionBegin;
1636   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1637   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1638   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1639   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1640     PetscCall(MatGetRowUpperTriangular(A));
1641     PetscCall(MatCopy_Basic(A, B, str));
1642     PetscCall(MatRestoreRowUpperTriangular(A));
1643   } else {
1644     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1645     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1646 
1647     PetscCall(MatCopy(a->A, b->A, str));
1648     PetscCall(MatCopy(a->B, b->B, str));
1649   }
1650   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1651   PetscFunctionReturn(PETSC_SUCCESS);
1652 }
1653 
1654 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1655 {
1656   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1657   PetscBLASInt  bnz, one                          = 1;
1658   Mat_SeqSBAIJ *xa, *ya;
1659   Mat_SeqBAIJ  *xb, *yb;
1660 
1661   PetscFunctionBegin;
1662   if (str == SAME_NONZERO_PATTERN) {
1663     PetscScalar alpha = a;
1664     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1665     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1666     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1667     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1668     xb = (Mat_SeqBAIJ *)xx->B->data;
1669     yb = (Mat_SeqBAIJ *)yy->B->data;
1670     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1671     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1672     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1673   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1674     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1675     PetscCall(MatAXPY_Basic(Y, a, X, str));
1676     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1677   } else {
1678     Mat       B;
1679     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1680     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1681     PetscCall(MatGetRowUpperTriangular(X));
1682     PetscCall(MatGetRowUpperTriangular(Y));
1683     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1684     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1685     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1686     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1687     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1688     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1689     PetscCall(MatSetType(B, MATMPISBAIJ));
1690     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1691     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1692     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1693     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1694     PetscCall(MatHeaderMerge(Y, &B));
1695     PetscCall(PetscFree(nnz_d));
1696     PetscCall(PetscFree(nnz_o));
1697     PetscCall(MatRestoreRowUpperTriangular(X));
1698     PetscCall(MatRestoreRowUpperTriangular(Y));
1699   }
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1704 {
1705   PetscInt  i;
1706   PetscBool flg;
1707 
1708   PetscFunctionBegin;
1709   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1710   for (i = 0; i < n; i++) {
1711     PetscCall(ISEqual(irow[i], icol[i], &flg));
1712     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1713   }
1714   PetscFunctionReturn(PETSC_SUCCESS);
1715 }
1716 
1717 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1718 {
1719   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1720   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1721 
1722   PetscFunctionBegin;
1723   if (!Y->preallocated) {
1724     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1725   } else if (!aij->nz) {
1726     PetscInt nonew = aij->nonew;
1727     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1728     aij->nonew = nonew;
1729   }
1730   PetscCall(MatShift_Basic(Y, a));
1731   PetscFunctionReturn(PETSC_SUCCESS);
1732 }
1733 
1734 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1735 {
1736   PetscFunctionBegin;
1737   *a = ((Mat_MPISBAIJ *)A->data)->A;
1738   PetscFunctionReturn(PETSC_SUCCESS);
1739 }
1740 
1741 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1742 {
1743   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1744 
1745   PetscFunctionBegin;
1746   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1747   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1748   PetscFunctionReturn(PETSC_SUCCESS);
1749 }
1750 
1751 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1752 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1753 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1754 
1755 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1756                                        MatGetRow_MPISBAIJ,
1757                                        MatRestoreRow_MPISBAIJ,
1758                                        MatMult_MPISBAIJ,
1759                                        /*  4*/ MatMultAdd_MPISBAIJ,
1760                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1761                                        MatMultAdd_MPISBAIJ,
1762                                        NULL,
1763                                        NULL,
1764                                        NULL,
1765                                        /* 10*/ NULL,
1766                                        NULL,
1767                                        NULL,
1768                                        MatSOR_MPISBAIJ,
1769                                        MatTranspose_MPISBAIJ,
1770                                        /* 15*/ MatGetInfo_MPISBAIJ,
1771                                        MatEqual_MPISBAIJ,
1772                                        MatGetDiagonal_MPISBAIJ,
1773                                        MatDiagonalScale_MPISBAIJ,
1774                                        MatNorm_MPISBAIJ,
1775                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1776                                        MatAssemblyEnd_MPISBAIJ,
1777                                        MatSetOption_MPISBAIJ,
1778                                        MatZeroEntries_MPISBAIJ,
1779                                        /* 24*/ NULL,
1780                                        NULL,
1781                                        NULL,
1782                                        NULL,
1783                                        NULL,
1784                                        /* 29*/ MatSetUp_MPI_Hash,
1785                                        NULL,
1786                                        NULL,
1787                                        MatGetDiagonalBlock_MPISBAIJ,
1788                                        NULL,
1789                                        /* 34*/ MatDuplicate_MPISBAIJ,
1790                                        NULL,
1791                                        NULL,
1792                                        NULL,
1793                                        NULL,
1794                                        /* 39*/ MatAXPY_MPISBAIJ,
1795                                        MatCreateSubMatrices_MPISBAIJ,
1796                                        MatIncreaseOverlap_MPISBAIJ,
1797                                        MatGetValues_MPISBAIJ,
1798                                        MatCopy_MPISBAIJ,
1799                                        /* 44*/ NULL,
1800                                        MatScale_MPISBAIJ,
1801                                        MatShift_MPISBAIJ,
1802                                        NULL,
1803                                        NULL,
1804                                        /* 49*/ NULL,
1805                                        NULL,
1806                                        NULL,
1807                                        NULL,
1808                                        NULL,
1809                                        /* 54*/ NULL,
1810                                        NULL,
1811                                        MatSetUnfactored_MPISBAIJ,
1812                                        NULL,
1813                                        MatSetValuesBlocked_MPISBAIJ,
1814                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1815                                        NULL,
1816                                        NULL,
1817                                        NULL,
1818                                        NULL,
1819                                        /* 64*/ NULL,
1820                                        NULL,
1821                                        NULL,
1822                                        NULL,
1823                                        MatGetRowMaxAbs_MPISBAIJ,
1824                                        /* 69*/ NULL,
1825                                        MatConvert_MPISBAIJ_Basic,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        NULL,
1831                                        NULL,
1832                                        NULL,
1833                                        MatLoad_MPISBAIJ,
1834                                        /* 79*/ NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        /* 84*/ NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        /* 89*/ NULL,
1845                                        NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        MatConjugate_MPISBAIJ,
1849                                        /* 94*/ NULL,
1850                                        NULL,
1851                                        MatRealPart_MPISBAIJ,
1852                                        MatImaginaryPart_MPISBAIJ,
1853                                        MatGetRowUpperTriangular_MPISBAIJ,
1854                                        /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        /*104*/ NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        /*109*/ NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        /*114*/ NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        /*119*/ NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        /*124*/ NULL,
1880                                        NULL,
1881                                        MatSetBlockSizes_Default,
1882                                        NULL,
1883                                        NULL,
1884                                        /*129*/ NULL,
1885                                        MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        /*134*/ NULL,
1890                                        NULL,
1891                                        MatEliminateZeros_MPISBAIJ,
1892                                        NULL,
1893                                        NULL,
1894                                        /*139*/ NULL,
1895                                        NULL,
1896                                        MatCopyHashToXAIJ_MPI_Hash,
1897                                        NULL,
1898                                        NULL};
1899 
1900 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1901 {
1902   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1903   PetscInt      i, mbs, Mbs;
1904   PetscMPIInt   size;
1905 
1906   PetscFunctionBegin;
1907   if (B->hash_active) {
1908     B->ops[0]      = b->cops;
1909     B->hash_active = PETSC_FALSE;
1910   }
1911   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1912   PetscCall(MatSetBlockSize(B, bs));
1913   PetscCall(PetscLayoutSetUp(B->rmap));
1914   PetscCall(PetscLayoutSetUp(B->cmap));
1915   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1916   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1917   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1918 
1919   mbs = B->rmap->n / bs;
1920   Mbs = B->rmap->N / bs;
1921   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1922 
1923   B->rmap->bs = bs;
1924   b->bs2      = bs * bs;
1925   b->mbs      = mbs;
1926   b->Mbs      = Mbs;
1927   b->nbs      = B->cmap->n / bs;
1928   b->Nbs      = B->cmap->N / bs;
1929 
1930   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1931   b->rstartbs = B->rmap->rstart / bs;
1932   b->rendbs   = B->rmap->rend / bs;
1933 
1934   b->cstartbs = B->cmap->rstart / bs;
1935   b->cendbs   = B->cmap->rend / bs;
1936 
1937 #if defined(PETSC_USE_CTABLE)
1938   PetscCall(PetscHMapIDestroy(&b->colmap));
1939 #else
1940   PetscCall(PetscFree(b->colmap));
1941 #endif
1942   PetscCall(PetscFree(b->garray));
1943   PetscCall(VecDestroy(&b->lvec));
1944   PetscCall(VecScatterDestroy(&b->Mvctx));
1945   PetscCall(VecDestroy(&b->slvec0));
1946   PetscCall(VecDestroy(&b->slvec0b));
1947   PetscCall(VecDestroy(&b->slvec1));
1948   PetscCall(VecDestroy(&b->slvec1a));
1949   PetscCall(VecDestroy(&b->slvec1b));
1950   PetscCall(VecScatterDestroy(&b->sMvctx));
1951 
1952   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1953 
1954   MatSeqXAIJGetOptions_Private(b->B);
1955   PetscCall(MatDestroy(&b->B));
1956   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1957   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1958   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1959   MatSeqXAIJRestoreOptions_Private(b->B);
1960 
1961   MatSeqXAIJGetOptions_Private(b->A);
1962   PetscCall(MatDestroy(&b->A));
1963   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1964   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1965   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1966   MatSeqXAIJRestoreOptions_Private(b->A);
1967 
1968   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1969   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1970 
1971   B->preallocated  = PETSC_TRUE;
1972   B->was_assembled = PETSC_FALSE;
1973   B->assembled     = PETSC_FALSE;
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1978 {
1979   PetscInt        m, rstart, cend;
1980   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1981   const PetscInt *JJ          = NULL;
1982   PetscScalar    *values      = NULL;
1983   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1984   PetscBool       nooffprocentries;
1985 
1986   PetscFunctionBegin;
1987   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1988   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1989   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1990   PetscCall(PetscLayoutSetUp(B->rmap));
1991   PetscCall(PetscLayoutSetUp(B->cmap));
1992   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1993   m      = B->rmap->n / bs;
1994   rstart = B->rmap->rstart / bs;
1995   cend   = B->cmap->rend / bs;
1996 
1997   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
1998   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
1999   for (i = 0; i < m; i++) {
2000     nz = ii[i + 1] - ii[i];
2001     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2002     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2003     JJ = jj + ii[i];
2004     bd = 0;
2005     for (j = 0; j < nz; j++) {
2006       if (*JJ >= i + rstart) break;
2007       JJ++;
2008       bd++;
2009     }
2010     d = 0;
2011     for (; j < nz; j++) {
2012       if (*JJ++ >= cend) break;
2013       d++;
2014     }
2015     d_nnz[i] = d;
2016     o_nnz[i] = nz - d - bd;
2017     nz       = nz - bd;
2018     nz_max   = PetscMax(nz_max, nz);
2019   }
2020   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2021   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2022   PetscCall(PetscFree2(d_nnz, o_nnz));
2023 
2024   values = (PetscScalar *)V;
2025   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2026   for (i = 0; i < m; i++) {
2027     PetscInt        row   = i + rstart;
2028     PetscInt        ncols = ii[i + 1] - ii[i];
2029     const PetscInt *icols = jj + ii[i];
2030     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2031       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2032       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2033     } else { /* block ordering does not match so we can only insert one block at a time. */
2034       PetscInt j;
2035       for (j = 0; j < ncols; j++) {
2036         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2037         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2038       }
2039     }
2040   }
2041 
2042   if (!V) PetscCall(PetscFree(values));
2043   nooffprocentries    = B->nooffprocentries;
2044   B->nooffprocentries = PETSC_TRUE;
2045   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2046   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2047   B->nooffprocentries = nooffprocentries;
2048 
2049   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2050   PetscFunctionReturn(PETSC_SUCCESS);
2051 }
2052 
2053 /*MC
2054    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2055    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2056    the matrix is stored.
2057 
2058    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2059    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2060 
2061    Options Database Key:
2062 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2063 
2064    Level: beginner
2065 
2066    Note:
2067      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2068      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2069 
2070 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2071 M*/
2072 
2073 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2074 {
2075   Mat_MPISBAIJ *b;
2076   PetscBool     flg = PETSC_FALSE;
2077 
2078   PetscFunctionBegin;
2079   PetscCall(PetscNew(&b));
2080   B->data   = (void *)b;
2081   B->ops[0] = MatOps_Values;
2082 
2083   B->ops->destroy = MatDestroy_MPISBAIJ;
2084   B->ops->view    = MatView_MPISBAIJ;
2085   B->assembled    = PETSC_FALSE;
2086   B->insertmode   = NOT_SET_VALUES;
2087 
2088   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2089   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2090 
2091   /* build local table of row and column ownerships */
2092   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2093 
2094   /* build cache for off array entries formed */
2095   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2096 
2097   b->donotstash  = PETSC_FALSE;
2098   b->colmap      = NULL;
2099   b->garray      = NULL;
2100   b->roworiented = PETSC_TRUE;
2101 
2102   /* stuff used in block assembly */
2103   b->barray = NULL;
2104 
2105   /* stuff used for matrix vector multiply */
2106   b->lvec    = NULL;
2107   b->Mvctx   = NULL;
2108   b->slvec0  = NULL;
2109   b->slvec0b = NULL;
2110   b->slvec1  = NULL;
2111   b->slvec1a = NULL;
2112   b->slvec1b = NULL;
2113   b->sMvctx  = NULL;
2114 
2115   /* stuff for MatGetRow() */
2116   b->rowindices   = NULL;
2117   b->rowvalues    = NULL;
2118   b->getrowactive = PETSC_FALSE;
2119 
2120   /* hash table stuff */
2121   b->ht           = NULL;
2122   b->hd           = NULL;
2123   b->ht_size      = 0;
2124   b->ht_flag      = PETSC_FALSE;
2125   b->ht_fact      = 0;
2126   b->ht_total_ct  = 0;
2127   b->ht_insert_ct = 0;
2128 
2129   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2130   b->ijonly = PETSC_FALSE;
2131 
2132   b->in_loc = NULL;
2133   b->v_loc  = NULL;
2134   b->n_loc  = 0;
2135 
2136   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2137   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2138   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2139   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2140 #if defined(PETSC_HAVE_ELEMENTAL)
2141   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2142 #endif
2143 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
2144   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2145 #endif
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2147   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2148 
2149   B->symmetric                   = PETSC_BOOL3_TRUE;
2150   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2151   B->symmetry_eternal            = PETSC_TRUE;
2152   B->structural_symmetry_eternal = PETSC_TRUE;
2153 #if !defined(PETSC_USE_COMPLEX)
2154   B->hermitian = PETSC_BOOL3_TRUE;
2155 #endif
2156 
2157   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2158   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2159   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2160   if (flg) {
2161     PetscReal fact = 1.39;
2162     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2163     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2164     if (fact <= 1.0) fact = 1.39;
2165     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2166     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2167   }
2168   PetscOptionsEnd();
2169   PetscFunctionReturn(PETSC_SUCCESS);
2170 }
2171 
2172 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2173 /*MC
2174    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2175 
2176    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2177    and `MATMPISBAIJ` otherwise.
2178 
2179    Options Database Key:
2180 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2181 
2182   Level: beginner
2183 
2184 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2185 M*/
2186 
2187 /*@
2188   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2189   the user should preallocate the matrix storage by setting the parameters
2190   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2191   performance can be increased by more than a factor of 50.
2192 
2193   Collective
2194 
2195   Input Parameters:
2196 + B     - the matrix
2197 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2198           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2199 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2200           submatrix  (same for all local rows)
2201 . d_nnz - array containing the number of block nonzeros in the various block rows
2202           in the upper triangular and diagonal part of the in diagonal portion of the local
2203           (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2204           for the diagonal entry and set a value even if it is zero.
2205 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2206           submatrix (same for all local rows).
2207 - o_nnz - array containing the number of nonzeros in the various block rows of the
2208           off-diagonal portion of the local submatrix that is right of the diagonal
2209           (possibly different for each block row) or `NULL`.
2210 
2211   Options Database Keys:
2212 + -mat_no_unroll  - uses code that does not unroll the loops in the
2213                     block calculations (much slower)
2214 - -mat_block_size - size of the blocks to use
2215 
2216   Level: intermediate
2217 
2218   Notes:
2219 
2220   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2221   than it must be used on all processors that share the object for that argument.
2222 
2223   If the *_nnz parameter is given then the *_nz parameter is ignored
2224 
2225   Storage Information:
2226   For a square global matrix we define each processor's diagonal portion
2227   to be its local rows and the corresponding columns (a square submatrix);
2228   each processor's off-diagonal portion encompasses the remainder of the
2229   local matrix (a rectangular submatrix).
2230 
2231   The user can specify preallocated storage for the diagonal part of
2232   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2233   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2234   memory allocation.  Likewise, specify preallocated storage for the
2235   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2236 
2237   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2238   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2239   You can also run with the option `-info` and look for messages with the string
2240   malloc in them to see if additional memory allocation was needed.
2241 
2242   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2243   the figure below we depict these three local rows and all columns (0-11).
2244 
2245 .vb
2246            0 1 2 3 4 5 6 7 8 9 10 11
2247           --------------------------
2248    row 3  |. . . d d d o o o o  o  o
2249    row 4  |. . . d d d o o o o  o  o
2250    row 5  |. . . d d d o o o o  o  o
2251           --------------------------
2252 .ve
2253 
2254   Thus, any entries in the d locations are stored in the d (diagonal)
2255   submatrix, and any entries in the o locations are stored in the
2256   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2257   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2258 
2259   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2260   plus the diagonal part of the d matrix,
2261   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2262 
2263   In general, for PDE problems in which most nonzeros are near the diagonal,
2264   one expects `d_nz` >> `o_nz`.
2265 
2266 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2267 @*/
2268 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2269 {
2270   PetscFunctionBegin;
2271   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2272   PetscValidType(B, 1);
2273   PetscValidLogicalCollectiveInt(B, bs, 2);
2274   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2275   PetscFunctionReturn(PETSC_SUCCESS);
2276 }
2277 
2278 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2279 /*@
2280   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2281   (block compressed row).  For good matrix assembly performance
2282   the user should preallocate the matrix storage by setting the parameters
2283   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2284 
2285   Collective
2286 
2287   Input Parameters:
2288 + comm  - MPI communicator
2289 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2290           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2291 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2292           This value should be the same as the local size used in creating the
2293           y vector for the matrix-vector product y = Ax.
2294 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2295           This value should be the same as the local size used in creating the
2296           x vector for the matrix-vector product y = Ax.
2297 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2298 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2299 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2300           submatrix (same for all local rows)
2301 . d_nnz - array containing the number of block nonzeros in the various block rows
2302           in the upper triangular portion of the in diagonal portion of the local
2303           (possibly different for each block block row) or `NULL`.
2304           If you plan to factor the matrix you must leave room for the diagonal entry and
2305           set its value even if it is zero.
2306 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2307           submatrix (same for all local rows).
2308 - o_nnz - array containing the number of nonzeros in the various block rows of the
2309           off-diagonal portion of the local submatrix (possibly different for
2310           each block row) or `NULL`.
2311 
2312   Output Parameter:
2313 . A - the matrix
2314 
2315   Options Database Keys:
2316 + -mat_no_unroll  - uses code that does not unroll the loops in the
2317                     block calculations (much slower)
2318 . -mat_block_size - size of the blocks to use
2319 - -mat_mpi        - use the parallel matrix data structures even on one processor
2320                     (defaults to using SeqBAIJ format on one processor)
2321 
2322   Level: intermediate
2323 
2324   Notes:
2325   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2326   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2327   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2328 
2329   The number of rows and columns must be divisible by blocksize.
2330   This matrix type does not support complex Hermitian operation.
2331 
2332   The user MUST specify either the local or global matrix dimensions
2333   (possibly both).
2334 
2335   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2336   than it must be used on all processors that share the object for that argument.
2337 
2338   If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2339   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2340 
2341   If the *_nnz parameter is given then the *_nz parameter is ignored
2342 
2343   Storage Information:
2344   For a square global matrix we define each processor's diagonal portion
2345   to be its local rows and the corresponding columns (a square submatrix);
2346   each processor's off-diagonal portion encompasses the remainder of the
2347   local matrix (a rectangular submatrix).
2348 
2349   The user can specify preallocated storage for the diagonal part of
2350   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2351   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2352   memory allocation. Likewise, specify preallocated storage for the
2353   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2354 
2355   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2356   the figure below we depict these three local rows and all columns (0-11).
2357 
2358 .vb
2359            0 1 2 3 4 5 6 7 8 9 10 11
2360           --------------------------
2361    row 3  |. . . d d d o o o o  o  o
2362    row 4  |. . . d d d o o o o  o  o
2363    row 5  |. . . d d d o o o o  o  o
2364           --------------------------
2365 .ve
2366 
2367   Thus, any entries in the d locations are stored in the d (diagonal)
2368   submatrix, and any entries in the o locations are stored in the
2369   o (off-diagonal) submatrix. Note that the d matrix is stored in
2370   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2371 
2372   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2373   plus the diagonal part of the d matrix,
2374   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2375   In general, for PDE problems in which most nonzeros are near the diagonal,
2376   one expects `d_nz` >> `o_nz`.
2377 
2378 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2379           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2380 @*/
2381 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2382 {
2383   PetscMPIInt size;
2384 
2385   PetscFunctionBegin;
2386   PetscCall(MatCreate(comm, A));
2387   PetscCall(MatSetSizes(*A, m, n, M, N));
2388   PetscCallMPI(MPI_Comm_size(comm, &size));
2389   if (size > 1) {
2390     PetscCall(MatSetType(*A, MATMPISBAIJ));
2391     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2392   } else {
2393     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2394     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2395   }
2396   PetscFunctionReturn(PETSC_SUCCESS);
2397 }
2398 
2399 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2400 {
2401   Mat           mat;
2402   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2403   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2404   PetscScalar  *array;
2405 
2406   PetscFunctionBegin;
2407   *newmat = NULL;
2408 
2409   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2410   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2411   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2412   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2413   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2414 
2415   if (matin->hash_active) {
2416     PetscCall(MatSetUp(mat));
2417   } else {
2418     mat->factortype   = matin->factortype;
2419     mat->preallocated = PETSC_TRUE;
2420     mat->assembled    = PETSC_TRUE;
2421     mat->insertmode   = NOT_SET_VALUES;
2422 
2423     a      = (Mat_MPISBAIJ *)mat->data;
2424     a->bs2 = oldmat->bs2;
2425     a->mbs = oldmat->mbs;
2426     a->nbs = oldmat->nbs;
2427     a->Mbs = oldmat->Mbs;
2428     a->Nbs = oldmat->Nbs;
2429 
2430     a->size         = oldmat->size;
2431     a->rank         = oldmat->rank;
2432     a->donotstash   = oldmat->donotstash;
2433     a->roworiented  = oldmat->roworiented;
2434     a->rowindices   = NULL;
2435     a->rowvalues    = NULL;
2436     a->getrowactive = PETSC_FALSE;
2437     a->barray       = NULL;
2438     a->rstartbs     = oldmat->rstartbs;
2439     a->rendbs       = oldmat->rendbs;
2440     a->cstartbs     = oldmat->cstartbs;
2441     a->cendbs       = oldmat->cendbs;
2442 
2443     /* hash table stuff */
2444     a->ht           = NULL;
2445     a->hd           = NULL;
2446     a->ht_size      = 0;
2447     a->ht_flag      = oldmat->ht_flag;
2448     a->ht_fact      = oldmat->ht_fact;
2449     a->ht_total_ct  = 0;
2450     a->ht_insert_ct = 0;
2451 
2452     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2453     if (oldmat->colmap) {
2454 #if defined(PETSC_USE_CTABLE)
2455       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2456 #else
2457       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2458       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2459 #endif
2460     } else a->colmap = NULL;
2461 
2462     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2463       PetscCall(PetscMalloc1(len, &a->garray));
2464       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2465     } else a->garray = NULL;
2466 
2467     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2468     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2469     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2470 
2471     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2472     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2473 
2474     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2475     PetscCall(VecGetArray(a->slvec1, &array));
2476     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2477     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2478     PetscCall(VecRestoreArray(a->slvec1, &array));
2479     PetscCall(VecGetArray(a->slvec0, &array));
2480     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2481     PetscCall(VecRestoreArray(a->slvec0, &array));
2482 
2483     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2484     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2485     a->sMvctx = oldmat->sMvctx;
2486 
2487     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2488     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2489   }
2490   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2491   *newmat = mat;
2492   PetscFunctionReturn(PETSC_SUCCESS);
2493 }
2494 
2495 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2496 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2497 
2498 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2499 {
2500   PetscBool isbinary;
2501 
2502   PetscFunctionBegin;
2503   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2504   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2505   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2506   PetscFunctionReturn(PETSC_SUCCESS);
2507 }
2508 
2509 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2510 {
2511   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2512   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2513   PetscReal     atmp;
2514   PetscReal    *work, *svalues, *rvalues;
2515   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2516   PetscMPIInt   rank, size;
2517   PetscInt     *rowners_bs, count, source;
2518   PetscScalar  *va;
2519   MatScalar    *ba;
2520   MPI_Status    stat;
2521 
2522   PetscFunctionBegin;
2523   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2524   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2525   PetscCall(VecGetArray(v, &va));
2526 
2527   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2528   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2529 
2530   bs  = A->rmap->bs;
2531   mbs = a->mbs;
2532   Mbs = a->Mbs;
2533   ba  = b->a;
2534   bi  = b->i;
2535   bj  = b->j;
2536 
2537   /* find ownerships */
2538   rowners_bs = A->rmap->range;
2539 
2540   /* each proc creates an array to be distributed */
2541   PetscCall(PetscCalloc1(bs * Mbs, &work));
2542 
2543   /* row_max for B */
2544   if (rank != size - 1) {
2545     for (i = 0; i < mbs; i++) {
2546       ncols = bi[1] - bi[0];
2547       bi++;
2548       brow = bs * i;
2549       for (j = 0; j < ncols; j++) {
2550         bcol = bs * (*bj);
2551         for (kcol = 0; kcol < bs; kcol++) {
2552           col = bcol + kcol;           /* local col index */
2553           col += rowners_bs[rank + 1]; /* global col index */
2554           for (krow = 0; krow < bs; krow++) {
2555             atmp = PetscAbsScalar(*ba);
2556             ba++;
2557             row = brow + krow; /* local row index */
2558             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2559             if (work[col] < atmp) work[col] = atmp;
2560           }
2561         }
2562         bj++;
2563       }
2564     }
2565 
2566     /* send values to its owners */
2567     for (PetscMPIInt dest = rank + 1; dest < size; dest++) {
2568       svalues = work + rowners_bs[dest];
2569       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2570       PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2571     }
2572   }
2573 
2574   /* receive values */
2575   if (rank) {
2576     rvalues = work;
2577     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2578     for (source = 0; source < rank; source++) {
2579       PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2580       /* process values */
2581       for (i = 0; i < count; i++) {
2582         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2583       }
2584     }
2585   }
2586 
2587   PetscCall(VecRestoreArray(v, &va));
2588   PetscCall(PetscFree(work));
2589   PetscFunctionReturn(PETSC_SUCCESS);
2590 }
2591 
2592 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2593 {
2594   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2595   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2596   PetscScalar       *x, *ptr, *from;
2597   Vec                bb1;
2598   const PetscScalar *b;
2599 
2600   PetscFunctionBegin;
2601   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2602   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2603 
2604   if (flag == SOR_APPLY_UPPER) {
2605     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2606     PetscFunctionReturn(PETSC_SUCCESS);
2607   }
2608 
2609   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2610     if (flag & SOR_ZERO_INITIAL_GUESS) {
2611       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2612       its--;
2613     }
2614 
2615     PetscCall(VecDuplicate(bb, &bb1));
2616     while (its--) {
2617       /* lower triangular part: slvec0b = - B^T*xx */
2618       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2619 
2620       /* copy xx into slvec0a */
2621       PetscCall(VecGetArray(mat->slvec0, &ptr));
2622       PetscCall(VecGetArray(xx, &x));
2623       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2624       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2625 
2626       PetscCall(VecScale(mat->slvec0, -1.0));
2627 
2628       /* copy bb into slvec1a */
2629       PetscCall(VecGetArray(mat->slvec1, &ptr));
2630       PetscCall(VecGetArrayRead(bb, &b));
2631       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2632       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2633 
2634       /* set slvec1b = 0 */
2635       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2636       PetscCall(VecZeroEntries(mat->slvec1b));
2637 
2638       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2639       PetscCall(VecRestoreArray(xx, &x));
2640       PetscCall(VecRestoreArrayRead(bb, &b));
2641       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2642 
2643       /* upper triangular part: bb1 = bb1 - B*x */
2644       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2645 
2646       /* local diagonal sweep */
2647       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2648     }
2649     PetscCall(VecDestroy(&bb1));
2650   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2651     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2652   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2653     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2654   } else if (flag & SOR_EISENSTAT) {
2655     Vec                xx1;
2656     PetscBool          hasop;
2657     const PetscScalar *diag;
2658     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2659     PetscInt           i, n;
2660 
2661     if (!mat->xx1) {
2662       PetscCall(VecDuplicate(bb, &mat->xx1));
2663       PetscCall(VecDuplicate(bb, &mat->bb1));
2664     }
2665     xx1 = mat->xx1;
2666     bb1 = mat->bb1;
2667 
2668     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2669 
2670     if (!mat->diag) {
2671       /* this is wrong for same matrix with new nonzero values */
2672       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2673       PetscCall(MatGetDiagonal(matin, mat->diag));
2674     }
2675     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2676 
2677     if (hasop) {
2678       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2679       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2680     } else {
2681       /*
2682           These two lines are replaced by code that may be a bit faster for a good compiler
2683       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2684       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2685       */
2686       PetscCall(VecGetArray(mat->slvec1a, &sl));
2687       PetscCall(VecGetArrayRead(mat->diag, &diag));
2688       PetscCall(VecGetArrayRead(bb, &b));
2689       PetscCall(VecGetArray(xx, &x));
2690       PetscCall(VecGetLocalSize(xx, &n));
2691       if (omega == 1.0) {
2692         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2693         PetscCall(PetscLogFlops(2.0 * n));
2694       } else {
2695         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2696         PetscCall(PetscLogFlops(3.0 * n));
2697       }
2698       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2699       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2700       PetscCall(VecRestoreArrayRead(bb, &b));
2701       PetscCall(VecRestoreArray(xx, &x));
2702     }
2703 
2704     /* multiply off-diagonal portion of matrix */
2705     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2706     PetscCall(VecZeroEntries(mat->slvec1b));
2707     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2708     PetscCall(VecGetArray(mat->slvec0, &from));
2709     PetscCall(VecGetArray(xx, &x));
2710     PetscCall(PetscArraycpy(from, x, bs * mbs));
2711     PetscCall(VecRestoreArray(mat->slvec0, &from));
2712     PetscCall(VecRestoreArray(xx, &x));
2713     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2714     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2715     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2716 
2717     /* local sweep */
2718     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2719     PetscCall(VecAXPY(xx, 1.0, xx1));
2720   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 /*@
2725   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2726 
2727   Collective
2728 
2729   Input Parameters:
2730 + comm - MPI communicator
2731 . bs   - the block size, only a block size of 1 is supported
2732 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2733 . n    - This value should be the same as the local size used in creating the
2734          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2735          calculated if `N` is given) For square matrices `n` is almost always `m`.
2736 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2737 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2738 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2739 . j    - column indices
2740 - a    - matrix values
2741 
2742   Output Parameter:
2743 . mat - the matrix
2744 
2745   Level: intermediate
2746 
2747   Notes:
2748   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2749   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2750   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2751 
2752   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2753 
2754 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2755           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2756 @*/
2757 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2758 {
2759   PetscFunctionBegin;
2760   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2761   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2762   PetscCall(MatCreate(comm, mat));
2763   PetscCall(MatSetSizes(*mat, m, n, M, N));
2764   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2765   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2766   PetscFunctionReturn(PETSC_SUCCESS);
2767 }
2768 
2769 /*@
2770   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2771 
2772   Collective
2773 
2774   Input Parameters:
2775 + B  - the matrix
2776 . bs - the block size
2777 . i  - the indices into `j` for the start of each local row (indices start with zero)
2778 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2779 - v  - optional values in the matrix, pass `NULL` if not provided
2780 
2781   Level: advanced
2782 
2783   Notes:
2784   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2785   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2786   called this routine.
2787 
2788   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2789   and usually the numerical values as well
2790 
2791   Any entries passed in that are below the diagonal are ignored
2792 
2793 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2794           `MatCreateMPISBAIJWithArrays()`
2795 @*/
2796 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2797 {
2798   PetscFunctionBegin;
2799   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2800   PetscFunctionReturn(PETSC_SUCCESS);
2801 }
2802 
2803 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2804 {
2805   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2806   PetscInt    *indx;
2807   PetscScalar *values;
2808 
2809   PetscFunctionBegin;
2810   PetscCall(MatGetSize(inmat, &m, &N));
2811   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2812     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2813     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2814     PetscInt     *bindx, rmax = a->rmax, j;
2815     PetscMPIInt   rank, size;
2816 
2817     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2818     mbs = m / bs;
2819     Nbs = N / cbs;
2820     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2821     nbs = n / cbs;
2822 
2823     PetscCall(PetscMalloc1(rmax, &bindx));
2824     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2825 
2826     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2827     PetscCallMPI(MPI_Comm_rank(comm, &size));
2828     if (rank == size - 1) {
2829       /* Check sum(nbs) = Nbs */
2830       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2831     }
2832 
2833     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2834     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2835     for (i = 0; i < mbs; i++) {
2836       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2837       nnz = nnz / bs;
2838       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2839       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2840       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2841     }
2842     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2843     PetscCall(PetscFree(bindx));
2844 
2845     PetscCall(MatCreate(comm, outmat));
2846     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2847     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2848     PetscCall(MatSetType(*outmat, MATSBAIJ));
2849     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2850     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2851     MatPreallocateEnd(dnz, onz);
2852   }
2853 
2854   /* numeric phase */
2855   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2856   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2857 
2858   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2859   for (i = 0; i < m; i++) {
2860     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2861     Ii = i + rstart;
2862     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2863     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2864   }
2865   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2866   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2867   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2868   PetscFunctionReturn(PETSC_SUCCESS);
2869 }
2870