xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 65179781167c53baf4bbcd2ba89ddd86e7bb5b3d)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11 #if defined(PETSC_USE_LOG)
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13 #endif
14   PetscCall(MatStashDestroy_Private(&mat->stash));
15   PetscCall(MatStashDestroy_Private(&mat->bstash));
16   PetscCall(MatDestroy(&baij->A));
17   PetscCall(MatDestroy(&baij->B));
18 #if defined(PETSC_USE_CTABLE)
19   PetscCall(PetscHMapIDestroy(&baij->colmap));
20 #else
21   PetscCall(PetscFree(baij->colmap));
22 #endif
23   PetscCall(PetscFree(baij->garray));
24   PetscCall(VecDestroy(&baij->lvec));
25   PetscCall(VecScatterDestroy(&baij->Mvctx));
26   PetscCall(VecDestroy(&baij->slvec0));
27   PetscCall(VecDestroy(&baij->slvec0b));
28   PetscCall(VecDestroy(&baij->slvec1));
29   PetscCall(VecDestroy(&baij->slvec1a));
30   PetscCall(VecDestroy(&baij->slvec1b));
31   PetscCall(VecScatterDestroy(&baij->sMvctx));
32   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
33   PetscCall(PetscFree(baij->barray));
34   PetscCall(PetscFree(baij->hd));
35   PetscCall(VecDestroy(&baij->diag));
36   PetscCall(VecDestroy(&baij->bb1));
37   PetscCall(VecDestroy(&baij->xx1));
38 #if defined(PETSC_USE_REAL_MAT_SINGLE)
39   PetscCall(PetscFree(baij->setvaluescopy));
40 #endif
41   PetscCall(PetscFree(baij->in_loc));
42   PetscCall(PetscFree(baij->v_loc));
43   PetscCall(PetscFree(baij->rangebs));
44   PetscCall(PetscFree(mat->data));
45 
46   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
51 #if defined(PETSC_HAVE_ELEMENTAL)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
53 #endif
54 #if defined(PETSC_HAVE_SCALAPACK)
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
56 #endif
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
58   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
63 #define TYPE SBAIJ
64 #define TYPE_SBAIJ
65 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
66 #undef TYPE
67 #undef TYPE_SBAIJ
68 
69 #if defined(PETSC_HAVE_ELEMENTAL)
70 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
71 #endif
72 #if defined(PETSC_HAVE_SCALAPACK)
73 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74 #endif
75 
76 /* This could be moved to matimpl.h */
77 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78 {
79   Mat       preallocator;
80   PetscInt  r, rstart, rend;
81   PetscInt  bs, i, m, n, M, N;
82   PetscBool cong = PETSC_TRUE;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
86   PetscValidLogicalCollectiveInt(B, nm, 2);
87   for (i = 0; i < nm; i++) {
88     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
89     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
90     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91   }
92   PetscValidLogicalCollectiveBool(B, fill, 5);
93   PetscCall(MatGetBlockSize(B, &bs));
94   PetscCall(MatGetSize(B, &M, &N));
95   PetscCall(MatGetLocalSize(B, &m, &n));
96   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
97   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
98   PetscCall(MatSetBlockSize(preallocator, bs));
99   PetscCall(MatSetSizes(preallocator, m, n, M, N));
100   PetscCall(MatSetUp(preallocator));
101   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102   for (r = rstart; r < rend; ++r) {
103     PetscInt           ncols;
104     const PetscInt    *row;
105     const PetscScalar *vals;
106 
107     for (i = 0; i < nm; i++) {
108       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
109       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
110       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
111       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112     }
113   }
114   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
116   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
117   PetscCall(MatDestroy(&preallocator));
118   PetscFunctionReturn(PETSC_SUCCESS);
119 }
120 
121 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122 {
123   Mat      B;
124   PetscInt r;
125 
126   PetscFunctionBegin;
127   if (reuse != MAT_REUSE_MATRIX) {
128     PetscBool symm = PETSC_TRUE, isdense;
129     PetscInt  bs;
130 
131     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
132     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
133     PetscCall(MatSetType(B, newtype));
134     PetscCall(MatGetBlockSize(A, &bs));
135     PetscCall(MatSetBlockSize(B, bs));
136     PetscCall(PetscLayoutSetUp(B->rmap));
137     PetscCall(PetscLayoutSetUp(B->cmap));
138     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
139     if (!isdense) {
140       PetscCall(MatGetRowUpperTriangular(A));
141       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
142       PetscCall(MatRestoreRowUpperTriangular(A));
143     } else {
144       PetscCall(MatSetUp(B));
145     }
146   } else {
147     B = *newmat;
148     PetscCall(MatZeroEntries(B));
149   }
150 
151   PetscCall(MatGetRowUpperTriangular(A));
152   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153     PetscInt           ncols;
154     const PetscInt    *row;
155     const PetscScalar *vals;
156 
157     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
158     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159 #if defined(PETSC_USE_COMPLEX)
160     if (A->hermitian == PETSC_BOOL3_TRUE) {
161       PetscInt i;
162       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163     } else {
164       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165     }
166 #else
167     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168 #endif
169     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170   }
171   PetscCall(MatRestoreRowUpperTriangular(A));
172   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
173   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
174 
175   if (reuse == MAT_INPLACE_MATRIX) {
176     PetscCall(MatHeaderReplace(A, &B));
177   } else {
178     *newmat = B;
179   }
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
183 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184 {
185   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
186 
187   PetscFunctionBegin;
188   PetscCall(MatStoreValues(aij->A));
189   PetscCall(MatStoreValues(aij->B));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194 {
195   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
196 
197   PetscFunctionBegin;
198   PetscCall(MatRetrieveValues(aij->A));
199   PetscCall(MatRetrieveValues(aij->B));
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204   { \
205     brow = row / bs; \
206     rp   = aj + ai[brow]; \
207     ap   = aa + bs2 * ai[brow]; \
208     rmax = aimax[brow]; \
209     nrow = ailen[brow]; \
210     bcol = col / bs; \
211     ridx = row % bs; \
212     cidx = col % bs; \
213     low  = 0; \
214     high = nrow; \
215     while (high - low > 3) { \
216       t = (low + high) / 2; \
217       if (rp[t] > bcol) high = t; \
218       else low = t; \
219     } \
220     for (_i = low; _i < high; _i++) { \
221       if (rp[_i] > bcol) break; \
222       if (rp[_i] == bcol) { \
223         bap = ap + bs2 * _i + bs * cidx + ridx; \
224         if (addv == ADD_VALUES) *bap += value; \
225         else *bap = value; \
226         goto a_noinsert; \
227       } \
228     } \
229     if (a->nonew == 1) goto a_noinsert; \
230     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232     N = nrow++ - 1; \
233     /* shift up all the later entries in this row */ \
234     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
235     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
236     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237     rp[_i]                          = bcol; \
238     ap[bs2 * _i + bs * cidx + ridx] = value; \
239     A->nonzerostate++; \
240   a_noinsert:; \
241     ailen[brow] = nrow; \
242   }
243 
244 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245   { \
246     brow = row / bs; \
247     rp   = bj + bi[brow]; \
248     ap   = ba + bs2 * bi[brow]; \
249     rmax = bimax[brow]; \
250     nrow = bilen[brow]; \
251     bcol = col / bs; \
252     ridx = row % bs; \
253     cidx = col % bs; \
254     low  = 0; \
255     high = nrow; \
256     while (high - low > 3) { \
257       t = (low + high) / 2; \
258       if (rp[t] > bcol) high = t; \
259       else low = t; \
260     } \
261     for (_i = low; _i < high; _i++) { \
262       if (rp[_i] > bcol) break; \
263       if (rp[_i] == bcol) { \
264         bap = ap + bs2 * _i + bs * cidx + ridx; \
265         if (addv == ADD_VALUES) *bap += value; \
266         else *bap = value; \
267         goto b_noinsert; \
268       } \
269     } \
270     if (b->nonew == 1) goto b_noinsert; \
271     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273     N = nrow++ - 1; \
274     /* shift up all the later entries in this row */ \
275     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
276     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
277     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278     rp[_i]                          = bcol; \
279     ap[bs2 * _i + bs * cidx + ridx] = value; \
280     B->nonzerostate++; \
281   b_noinsert:; \
282     bilen[brow] = nrow; \
283   }
284 
285 /* Only add/insert a(i,j) with i<=j (blocks).
286    Any a(i,j) with i>j input by user is ignored or generates an error
287 */
288 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289 {
290   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291   MatScalar     value;
292   PetscBool     roworiented = baij->roworiented;
293   PetscInt      i, j, row, col;
294   PetscInt      rstart_orig = mat->rmap->rstart;
295   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
297 
298   /* Some Variables required in the macro */
299   Mat           A     = baij->A;
300   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
301   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302   MatScalar    *aa = a->a;
303 
304   Mat          B     = baij->B;
305   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
306   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307   MatScalar   *ba = b->a;
308 
309   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
310   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
311   MatScalar *ap, *bap;
312 
313   /* for stash */
314   PetscInt   n_loc, *in_loc = NULL;
315   MatScalar *v_loc = NULL;
316 
317   PetscFunctionBegin;
318   if (!baij->donotstash) {
319     if (n > baij->n_loc) {
320       PetscCall(PetscFree(baij->in_loc));
321       PetscCall(PetscFree(baij->v_loc));
322       PetscCall(PetscMalloc1(n, &baij->in_loc));
323       PetscCall(PetscMalloc1(n, &baij->v_loc));
324 
325       baij->n_loc = n;
326     }
327     in_loc = baij->in_loc;
328     v_loc  = baij->v_loc;
329   }
330 
331   for (i = 0; i < m; i++) {
332     if (im[i] < 0) continue;
333     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335       row = im[i] - rstart_orig;                     /* local row index */
336       for (j = 0; j < n; j++) {
337         if (im[i] / bs > in[j] / bs) {
338           if (a->ignore_ltriangular) {
339             continue; /* ignore lower triangular blocks */
340           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
341         }
342         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343           col  = in[j] - cstart_orig;                    /* local col index */
344           brow = row / bs;
345           bcol = col / bs;
346           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347           if (roworiented) value = v[i * n + j];
348           else value = v[i + j * m];
349           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
350           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351         } else if (in[j] < 0) {
352           continue;
353         } else {
354           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355           /* off-diag entry (B) */
356           if (mat->was_assembled) {
357             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358 #if defined(PETSC_USE_CTABLE)
359             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
360             col = col - 1;
361 #else
362             col = baij->colmap[in[j] / bs] - 1;
363 #endif
364             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
365               PetscCall(MatDisAssemble_MPISBAIJ(mat));
366               col = in[j];
367               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368               B     = baij->B;
369               b     = (Mat_SeqBAIJ *)(B)->data;
370               bimax = b->imax;
371               bi    = b->i;
372               bilen = b->ilen;
373               bj    = b->j;
374               ba    = b->a;
375             } else col += in[j] % bs;
376           } else col = in[j];
377           if (roworiented) value = v[i * n + j];
378           else value = v[i + j * m];
379           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
380           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381         }
382       }
383     } else { /* off processor entry */
384       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385       if (!baij->donotstash) {
386         mat->assembled = PETSC_FALSE;
387         n_loc          = 0;
388         for (j = 0; j < n; j++) {
389           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390           in_loc[n_loc] = in[j];
391           if (roworiented) {
392             v_loc[n_loc] = v[i * n + j];
393           } else {
394             v_loc[n_loc] = v[j * m + i];
395           }
396           n_loc++;
397         }
398         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399       }
400     }
401   }
402   PetscFunctionReturn(PETSC_SUCCESS);
403 }
404 
405 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406 {
407   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
408   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
409   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
410   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
411   PetscBool          roworiented = a->roworiented;
412   const PetscScalar *value       = v;
413   MatScalar         *ap, *aa = a->a, *bap;
414 
415   PetscFunctionBegin;
416   if (col < row) {
417     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
418     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
419   }
420   rp    = aj + ai[row];
421   ap    = aa + bs2 * ai[row];
422   rmax  = imax[row];
423   nrow  = ailen[row];
424   value = v;
425   low   = 0;
426   high  = nrow;
427 
428   while (high - low > 7) {
429     t = (low + high) / 2;
430     if (rp[t] > col) high = t;
431     else low = t;
432   }
433   for (i = low; i < high; i++) {
434     if (rp[i] > col) break;
435     if (rp[i] == col) {
436       bap = ap + bs2 * i;
437       if (roworiented) {
438         if (is == ADD_VALUES) {
439           for (ii = 0; ii < bs; ii++) {
440             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
441           }
442         } else {
443           for (ii = 0; ii < bs; ii++) {
444             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
445           }
446         }
447       } else {
448         if (is == ADD_VALUES) {
449           for (ii = 0; ii < bs; ii++) {
450             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
451           }
452         } else {
453           for (ii = 0; ii < bs; ii++) {
454             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
455           }
456         }
457       }
458       goto noinsert2;
459     }
460   }
461   if (nonew == 1) goto noinsert2;
462   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
463   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
464   N = nrow++ - 1;
465   high++;
466   /* shift up all the later entries in this row */
467   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
468   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
469   rp[i] = col;
470   bap   = ap + bs2 * i;
471   if (roworiented) {
472     for (ii = 0; ii < bs; ii++) {
473       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
474     }
475   } else {
476     for (ii = 0; ii < bs; ii++) {
477       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
478     }
479   }
480 noinsert2:;
481   ailen[row] = nrow;
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 /*
486    This routine is exactly duplicated in mpibaij.c
487 */
488 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489 {
490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
491   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
492   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
493   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
494   PetscBool          roworiented = a->roworiented;
495   const PetscScalar *value       = v;
496   MatScalar         *ap, *aa = a->a, *bap;
497 
498   PetscFunctionBegin;
499   rp    = aj + ai[row];
500   ap    = aa + bs2 * ai[row];
501   rmax  = imax[row];
502   nrow  = ailen[row];
503   low   = 0;
504   high  = nrow;
505   value = v;
506   while (high - low > 7) {
507     t = (low + high) / 2;
508     if (rp[t] > col) high = t;
509     else low = t;
510   }
511   for (i = low; i < high; i++) {
512     if (rp[i] > col) break;
513     if (rp[i] == col) {
514       bap = ap + bs2 * i;
515       if (roworiented) {
516         if (is == ADD_VALUES) {
517           for (ii = 0; ii < bs; ii++) {
518             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
519           }
520         } else {
521           for (ii = 0; ii < bs; ii++) {
522             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
523           }
524         }
525       } else {
526         if (is == ADD_VALUES) {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
529             bap += bs;
530           }
531         } else {
532           for (ii = 0; ii < bs; ii++, value += bs) {
533             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
534             bap += bs;
535           }
536         }
537       }
538       goto noinsert2;
539     }
540   }
541   if (nonew == 1) goto noinsert2;
542   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
543   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
544   N = nrow++ - 1;
545   high++;
546   /* shift up all the later entries in this row */
547   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
548   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
549   rp[i] = col;
550   bap   = ap + bs2 * i;
551   if (roworiented) {
552     for (ii = 0; ii < bs; ii++) {
553       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
554     }
555   } else {
556     for (ii = 0; ii < bs; ii++) {
557       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
558     }
559   }
560 noinsert2:;
561   ailen[row] = nrow;
562   PetscFunctionReturn(PETSC_SUCCESS);
563 }
564 
565 /*
566     This routine could be optimized by removing the need for the block copy below and passing stride information
567   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
568 */
569 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570 {
571   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
572   const MatScalar *value;
573   MatScalar       *barray      = baij->barray;
574   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
576   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
578 
579   PetscFunctionBegin;
580   if (!barray) {
581     PetscCall(PetscMalloc1(bs2, &barray));
582     baij->barray = barray;
583   }
584 
585   if (roworiented) {
586     stepval = (n - 1) * bs;
587   } else {
588     stepval = (m - 1) * bs;
589   }
590   for (i = 0; i < m; i++) {
591     if (im[i] < 0) continue;
592     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
593     if (im[i] >= rstart && im[i] < rend) {
594       row = im[i] - rstart;
595       for (j = 0; j < n; j++) {
596         if (im[i] > in[j]) {
597           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599         }
600         /* If NumCol = 1 then a copy is not required */
601         if ((roworiented) && (n == 1)) {
602           barray = (MatScalar *)v + i * bs2;
603         } else if ((!roworiented) && (m == 1)) {
604           barray = (MatScalar *)v + j * bs2;
605         } else { /* Here a copy is required */
606           if (roworiented) {
607             value = v + i * (stepval + bs) * bs + j * bs;
608           } else {
609             value = v + j * (stepval + bs) * bs + i * bs;
610           }
611           for (ii = 0; ii < bs; ii++, value += stepval) {
612             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
613           }
614           barray -= bs2;
615         }
616 
617         if (in[j] >= cstart && in[j] < cend) {
618           col = in[j] - cstart;
619           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620         } else if (in[j] < 0) {
621           continue;
622         } else {
623           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
624           if (mat->was_assembled) {
625             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
626 
627 #if defined(PETSC_USE_DEBUG)
628   #if defined(PETSC_USE_CTABLE)
629             {
630               PetscInt data;
631               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
632               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
633             }
634   #else
635             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
636   #endif
637 #endif
638 #if defined(PETSC_USE_CTABLE)
639             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
640             col = (col - 1) / bs;
641 #else
642             col = (baij->colmap[in[j]] - 1) / bs;
643 #endif
644             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
645               PetscCall(MatDisAssemble_MPISBAIJ(mat));
646               col = in[j];
647             }
648           } else col = in[j];
649           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
650         }
651       }
652     } else {
653       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
654       if (!baij->donotstash) {
655         if (roworiented) {
656           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         } else {
658           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
659         }
660       }
661     }
662   }
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667 {
668   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
671 
672   PetscFunctionBegin;
673   for (i = 0; i < m; i++) {
674     if (idxm[i] < 0) continue; /* negative row */
675     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677       row = idxm[i] - bsrstart;
678       for (j = 0; j < n; j++) {
679         if (idxn[j] < 0) continue; /* negative column */
680         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681         if (idxn[j] >= bscstart && idxn[j] < bscend) {
682           col = idxn[j] - bscstart;
683           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684         } else {
685           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686 #if defined(PETSC_USE_CTABLE)
687           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688           data--;
689 #else
690           data = baij->colmap[idxn[j] / bs] - 1;
691 #endif
692           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693           else {
694             col = data + idxn[j] % bs;
695             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696           }
697         }
698       }
699     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705 {
706   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707   PetscReal     sum[2], *lnorm2;
708 
709   PetscFunctionBegin;
710   if (baij->size == 1) {
711     PetscCall(MatNorm(baij->A, type, norm));
712   } else {
713     if (type == NORM_FROBENIUS) {
714       PetscCall(PetscMalloc1(2, &lnorm2));
715       PetscCall(MatNorm(baij->A, type, lnorm2));
716       *lnorm2 = (*lnorm2) * (*lnorm2);
717       lnorm2++; /* squar power of norm(A) */
718       PetscCall(MatNorm(baij->B, type, lnorm2));
719       *lnorm2 = (*lnorm2) * (*lnorm2);
720       lnorm2--; /* squar power of norm(B) */
721       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
722       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
723       PetscCall(PetscFree(lnorm2));
724     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
725       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
726       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
727       PetscReal    *rsum, *rsum2, vabs;
728       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
730       MatScalar    *v;
731 
732       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
733       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
734       /* Amat */
735       v  = amat->a;
736       jj = amat->j;
737       for (brow = 0; brow < mbs; brow++) {
738         grow = bs * (rstart + brow);
739         nz   = amat->i[brow + 1] - amat->i[brow];
740         for (bcol = 0; bcol < nz; bcol++) {
741           gcol = bs * (rstart + *jj);
742           jj++;
743           for (col = 0; col < bs; col++) {
744             for (row = 0; row < bs; row++) {
745               vabs = PetscAbsScalar(*v);
746               v++;
747               rsum[gcol + col] += vabs;
748               /* non-diagonal block */
749               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
750             }
751           }
752         }
753         PetscCall(PetscLogFlops(nz * bs * bs));
754       }
755       /* Bmat */
756       v  = bmat->a;
757       jj = bmat->j;
758       for (brow = 0; brow < mbs; brow++) {
759         grow = bs * (rstart + brow);
760         nz   = bmat->i[brow + 1] - bmat->i[brow];
761         for (bcol = 0; bcol < nz; bcol++) {
762           gcol = bs * garray[*jj];
763           jj++;
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               vabs = PetscAbsScalar(*v);
767               v++;
768               rsum[gcol + col] += vabs;
769               rsum[grow + row] += vabs;
770             }
771           }
772         }
773         PetscCall(PetscLogFlops(nz * bs * bs));
774       }
775       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
776       *norm = 0.0;
777       for (col = 0; col < mat->cmap->N; col++) {
778         if (rsum2[col] > *norm) *norm = rsum2[col];
779       }
780       PetscCall(PetscFree2(rsum, rsum2));
781     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782   }
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787 {
788   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
789   PetscInt      nstash, reallocs;
790 
791   PetscFunctionBegin;
792   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
793 
794   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
795   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
799   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
800   PetscFunctionReturn(PETSC_SUCCESS);
801 }
802 
803 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804 {
805   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
807   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
808   PetscInt     *row, *col;
809   PetscBool     other_disassembled;
810   PetscMPIInt   n;
811   PetscBool     r1, r2, r3;
812   MatScalar    *val;
813 
814   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815   PetscFunctionBegin;
816   if (!baij->donotstash && !mat->nooffprocentries) {
817     while (1) {
818       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819       if (!flg) break;
820 
821       for (i = 0; i < n;) {
822         /* Now identify the consecutive vals belonging to the same row */
823         for (j = i, rstart = row[j]; j < n; j++) {
824           if (row[j] != rstart) break;
825         }
826         if (j < n) ncols = j - i;
827         else ncols = n - i;
828         /* Now assemble all these values with a single function call */
829         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830         i = j;
831       }
832     }
833     PetscCall(MatStashScatterEnd_Private(&mat->stash));
834     /* Now process the block-stash. Since the values are stashed column-oriented,
835        set the roworiented flag to column oriented, and after MatSetValues()
836        restore the original flags */
837     r1 = baij->roworiented;
838     r2 = a->roworiented;
839     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
840 
841     baij->roworiented = PETSC_FALSE;
842     a->roworiented    = PETSC_FALSE;
843 
844     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845     while (1) {
846       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847       if (!flg) break;
848 
849       for (i = 0; i < n;) {
850         /* Now identify the consecutive vals belonging to the same row */
851         for (j = i, rstart = row[j]; j < n; j++) {
852           if (row[j] != rstart) break;
853         }
854         if (j < n) ncols = j - i;
855         else ncols = n - i;
856         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857         i = j;
858       }
859     }
860     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
861 
862     baij->roworiented = r1;
863     a->roworiented    = r2;
864 
865     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866   }
867 
868   PetscCall(MatAssemblyBegin(baij->A, mode));
869   PetscCall(MatAssemblyEnd(baij->A, mode));
870 
871   /* determine if any processor has disassembled, if so we must
872      also disassemble ourselves, in order that we may reassemble. */
873   /*
874      if nonzero structure of submatrix B cannot change then we know that
875      no processor disassembled thus we can skip this stuff
876   */
877   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
878     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
879     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880   }
881 
882   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
883   PetscCall(MatAssemblyBegin(baij->B, mode));
884   PetscCall(MatAssemblyEnd(baij->B, mode));
885 
886   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
887 
888   baij->rowvalues = NULL;
889 
890   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
891   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
893     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
899 #include <petscdraw.h>
900 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901 {
902   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
903   PetscInt          bs   = mat->rmap->bs;
904   PetscMPIInt       rank = baij->rank;
905   PetscBool         iascii, isdraw;
906   PetscViewer       sviewer;
907   PetscViewerFormat format;
908 
909   PetscFunctionBegin;
910   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
911   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
912   if (iascii) {
913     PetscCall(PetscViewerGetFormat(viewer, &format));
914     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915       MatInfo info;
916       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
917       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
918       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
919       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
920                                                    mat->rmap->bs, (double)info.memory));
921       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
924       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
925       PetscCall(PetscViewerFlush(viewer));
926       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
927       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
928       PetscCall(VecScatterView(baij->Mvctx, viewer));
929       PetscFunctionReturn(PETSC_SUCCESS);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
932       PetscFunctionReturn(PETSC_SUCCESS);
933     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
934       PetscFunctionReturn(PETSC_SUCCESS);
935     }
936   }
937 
938   if (isdraw) {
939     PetscDraw draw;
940     PetscBool isnull;
941     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
942     PetscCall(PetscDrawIsNull(draw, &isnull));
943     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944   }
945 
946   {
947     /* assemble the entire matrix onto first processor. */
948     Mat           A;
949     Mat_SeqSBAIJ *Aloc;
950     Mat_SeqBAIJ  *Bloc;
951     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952     MatScalar    *a;
953     const char   *matname;
954 
955     /* Should this be the same type as mat? */
956     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957     if (rank == 0) {
958       PetscCall(MatSetSizes(A, M, N, M, N));
959     } else {
960       PetscCall(MatSetSizes(A, 0, 0, M, N));
961     }
962     PetscCall(MatSetType(A, MATMPISBAIJ));
963     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
964     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
968     ai   = Aloc->i;
969     aj   = Aloc->j;
970     a    = Aloc->a;
971     PetscCall(PetscMalloc1(bs, &rvals));
972 
973     for (i = 0; i < mbs; i++) {
974       rvals[0] = bs * (baij->rstartbs + i);
975       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976       for (j = ai[i]; j < ai[i + 1]; j++) {
977         col = (baij->cstartbs + aj[j]) * bs;
978         for (k = 0; k < bs; k++) {
979           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
980           col++;
981           a += bs;
982         }
983       }
984     }
985     /* copy over the B part */
986     Bloc = (Mat_SeqBAIJ *)baij->B->data;
987     ai   = Bloc->i;
988     aj   = Bloc->j;
989     a    = Bloc->a;
990     for (i = 0; i < mbs; i++) {
991       rvals[0] = bs * (baij->rstartbs + i);
992       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993       for (j = ai[i]; j < ai[i + 1]; j++) {
994         col = baij->garray[aj[j]] * bs;
995         for (k = 0; k < bs; k++) {
996           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
997           col++;
998           a += bs;
999         }
1000       }
1001     }
1002     PetscCall(PetscFree(rvals));
1003     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1004     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005     /*
1006        Everyone has to call to draw the matrix since the graphics waits are
1007        synchronized across all processors that share the PetscDraw object
1008     */
1009     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1010     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011     if (rank == 0) {
1012       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
1013       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014     }
1015     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1016     PetscCall(PetscViewerFlush(viewer));
1017     PetscCall(MatDestroy(&A));
1018   }
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
1022 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1024 
1025 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026 {
1027   PetscBool iascii, isdraw, issocket, isbinary;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1032   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1033   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034   if (iascii || isdraw || issocket) {
1035     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1036   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041 {
1042   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1043   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1044   PetscScalar       *from;
1045   const PetscScalar *x;
1046 
1047   PetscFunctionBegin;
1048   /* diagonal part */
1049   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1050   PetscCall(VecSet(a->slvec1b, 0.0));
1051 
1052   /* subdiagonal part */
1053   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1054   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1055 
1056   /* copy x into the vec slvec0 */
1057   PetscCall(VecGetArray(a->slvec0, &from));
1058   PetscCall(VecGetArrayRead(xx, &x));
1059 
1060   PetscCall(PetscArraycpy(from, x, bs * mbs));
1061   PetscCall(VecRestoreArray(a->slvec0, &from));
1062   PetscCall(VecRestoreArrayRead(xx, &x));
1063 
1064   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1065   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1066   /* supperdiagonal part */
1067   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1072 {
1073   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1074   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1075   PetscScalar       *from;
1076   const PetscScalar *x;
1077 
1078   PetscFunctionBegin;
1079   /* diagonal part */
1080   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1081   PetscCall(VecSet(a->slvec1b, 0.0));
1082 
1083   /* subdiagonal part */
1084   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1085 
1086   /* copy x into the vec slvec0 */
1087   PetscCall(VecGetArray(a->slvec0, &from));
1088   PetscCall(VecGetArrayRead(xx, &x));
1089 
1090   PetscCall(PetscArraycpy(from, x, bs * mbs));
1091   PetscCall(VecRestoreArray(a->slvec0, &from));
1092   PetscCall(VecRestoreArrayRead(xx, &x));
1093 
1094   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1095   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1096   /* supperdiagonal part */
1097   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1098   PetscFunctionReturn(PETSC_SUCCESS);
1099 }
1100 
1101 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1102 {
1103   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1104   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1105   PetscScalar       *from, zero       = 0.0;
1106   const PetscScalar *x;
1107 
1108   PetscFunctionBegin;
1109   /* diagonal part */
1110   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1111   PetscCall(VecSet(a->slvec1b, zero));
1112 
1113   /* subdiagonal part */
1114   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1115   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1116 
1117   /* copy x into the vec slvec0 */
1118   PetscCall(VecGetArray(a->slvec0, &from));
1119   PetscCall(VecGetArrayRead(xx, &x));
1120   PetscCall(PetscArraycpy(from, x, bs * mbs));
1121   PetscCall(VecRestoreArray(a->slvec0, &from));
1122 
1123   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1124   PetscCall(VecRestoreArrayRead(xx, &x));
1125   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1126 
1127   /* supperdiagonal part */
1128   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133 {
1134   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1135   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1136   PetscScalar       *from, zero       = 0.0;
1137   const PetscScalar *x;
1138 
1139   PetscFunctionBegin;
1140   /* diagonal part */
1141   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1142   PetscCall(VecSet(a->slvec1b, zero));
1143 
1144   /* subdiagonal part */
1145   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1146 
1147   /* copy x into the vec slvec0 */
1148   PetscCall(VecGetArray(a->slvec0, &from));
1149   PetscCall(VecGetArrayRead(xx, &x));
1150   PetscCall(PetscArraycpy(from, x, bs * mbs));
1151   PetscCall(VecRestoreArray(a->slvec0, &from));
1152 
1153   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1154   PetscCall(VecRestoreArrayRead(xx, &x));
1155   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1156 
1157   /* supperdiagonal part */
1158   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1159   PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1167 {
1168   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1172   PetscCall(MatGetDiagonal(a->A, v));
1173   PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175 
1176 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1177 {
1178   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1179 
1180   PetscFunctionBegin;
1181   PetscCall(MatScale(a->A, aa));
1182   PetscCall(MatScale(a->B, aa));
1183   PetscFunctionReturn(PETSC_SUCCESS);
1184 }
1185 
1186 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1187 {
1188   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1189   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1190   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1191   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1192   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1193 
1194   PetscFunctionBegin;
1195   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1196   mat->getrowactive = PETSC_TRUE;
1197 
1198   if (!mat->rowvalues && (idx || v)) {
1199     /*
1200         allocate enough space to hold information from the longest row.
1201     */
1202     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1203     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1204     PetscInt      max = 1, mbs = mat->mbs, tmp;
1205     for (i = 0; i < mbs; i++) {
1206       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1207       if (max < tmp) max = tmp;
1208     }
1209     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1210   }
1211 
1212   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1213   lrow = row - brstart; /* local row index */
1214 
1215   pvA = &vworkA;
1216   pcA = &cworkA;
1217   pvB = &vworkB;
1218   pcB = &cworkB;
1219   if (!v) {
1220     pvA = NULL;
1221     pvB = NULL;
1222   }
1223   if (!idx) {
1224     pcA = NULL;
1225     if (!v) pcB = NULL;
1226   }
1227   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1228   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1229   nztot = nzA + nzB;
1230 
1231   cmap = mat->garray;
1232   if (v || idx) {
1233     if (nztot) {
1234       /* Sort by increasing column numbers, assuming A and B already sorted */
1235       PetscInt imark = -1;
1236       if (v) {
1237         *v = v_p = mat->rowvalues;
1238         for (i = 0; i < nzB; i++) {
1239           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1240           else break;
1241         }
1242         imark = i;
1243         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1244         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1245       }
1246       if (idx) {
1247         *idx = idx_p = mat->rowindices;
1248         if (imark > -1) {
1249           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1250         } else {
1251           for (i = 0; i < nzB; i++) {
1252             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253             else break;
1254           }
1255           imark = i;
1256         }
1257         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1258         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259       }
1260     } else {
1261       if (idx) *idx = NULL;
1262       if (v) *v = NULL;
1263     }
1264   }
1265   *nz = nztot;
1266   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1267   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1272 {
1273   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1274 
1275   PetscFunctionBegin;
1276   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1277   baij->getrowactive = PETSC_FALSE;
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1282 {
1283   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1284   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1285 
1286   PetscFunctionBegin;
1287   aA->getrow_utriangular = PETSC_TRUE;
1288   PetscFunctionReturn(PETSC_SUCCESS);
1289 }
1290 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1291 {
1292   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1293   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1294 
1295   PetscFunctionBegin;
1296   aA->getrow_utriangular = PETSC_FALSE;
1297   PetscFunctionReturn(PETSC_SUCCESS);
1298 }
1299 
1300 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1301 {
1302   PetscFunctionBegin;
1303   if (PetscDefined(USE_COMPLEX)) {
1304     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1305 
1306     PetscCall(MatConjugate(a->A));
1307     PetscCall(MatConjugate(a->B));
1308   }
1309   PetscFunctionReturn(PETSC_SUCCESS);
1310 }
1311 
1312 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1315 
1316   PetscFunctionBegin;
1317   PetscCall(MatRealPart(a->A));
1318   PetscCall(MatRealPart(a->B));
1319   PetscFunctionReturn(PETSC_SUCCESS);
1320 }
1321 
1322 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1325 
1326   PetscFunctionBegin;
1327   PetscCall(MatImaginaryPart(a->A));
1328   PetscCall(MatImaginaryPart(a->B));
1329   PetscFunctionReturn(PETSC_SUCCESS);
1330 }
1331 
1332 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1333    Input: isrow       - distributed(parallel),
1334           iscol_local - locally owned (seq)
1335 */
1336 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1337 {
1338   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1339   const PetscInt *ptr1, *ptr2;
1340 
1341   PetscFunctionBegin;
1342   PetscCall(ISGetLocalSize(isrow, &sz1));
1343   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1344   if (sz1 > sz2) {
1345     *flg = PETSC_FALSE;
1346     PetscFunctionReturn(PETSC_SUCCESS);
1347   }
1348 
1349   PetscCall(ISGetIndices(isrow, &ptr1));
1350   PetscCall(ISGetIndices(iscol_local, &ptr2));
1351 
1352   PetscCall(PetscMalloc1(sz1, &a1));
1353   PetscCall(PetscMalloc1(sz2, &a2));
1354   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1355   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1356   PetscCall(PetscSortInt(sz1, a1));
1357   PetscCall(PetscSortInt(sz2, a2));
1358 
1359   nmatch = 0;
1360   k      = 0;
1361   for (i = 0; i < sz1; i++) {
1362     for (j = k; j < sz2; j++) {
1363       if (a1[i] == a2[j]) {
1364         k = j;
1365         nmatch++;
1366         break;
1367       }
1368     }
1369   }
1370   PetscCall(ISRestoreIndices(isrow, &ptr1));
1371   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1372   PetscCall(PetscFree(a1));
1373   PetscCall(PetscFree(a2));
1374   if (nmatch < sz1) {
1375     *flg = PETSC_FALSE;
1376   } else {
1377     *flg = PETSC_TRUE;
1378   }
1379   PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381 
1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1383 {
1384   IS        iscol_local;
1385   PetscInt  csize;
1386   PetscBool isequal;
1387 
1388   PetscFunctionBegin;
1389   PetscCall(ISGetLocalSize(iscol, &csize));
1390   if (call == MAT_REUSE_MATRIX) {
1391     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1392     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1393   } else {
1394     PetscBool issorted;
1395 
1396     PetscCall(ISAllGather(iscol, &iscol_local));
1397     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1398     PetscCall(ISSorted(iscol_local, &issorted));
1399     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1400   }
1401 
1402   /* now call MatCreateSubMatrix_MPIBAIJ() */
1403   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1404   if (call == MAT_INITIAL_MATRIX) {
1405     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1406     PetscCall(ISDestroy(&iscol_local));
1407   }
1408   PetscFunctionReturn(PETSC_SUCCESS);
1409 }
1410 
1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(MatZeroEntries(l->A));
1417   PetscCall(MatZeroEntries(l->B));
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1422 {
1423   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1424   Mat            A = a->A, B = a->B;
1425   PetscLogDouble isend[5], irecv[5];
1426 
1427   PetscFunctionBegin;
1428   info->block_size = (PetscReal)matin->rmap->bs;
1429 
1430   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1431 
1432   isend[0] = info->nz_used;
1433   isend[1] = info->nz_allocated;
1434   isend[2] = info->nz_unneeded;
1435   isend[3] = info->memory;
1436   isend[4] = info->mallocs;
1437 
1438   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1439 
1440   isend[0] += info->nz_used;
1441   isend[1] += info->nz_allocated;
1442   isend[2] += info->nz_unneeded;
1443   isend[3] += info->memory;
1444   isend[4] += info->mallocs;
1445   if (flag == MAT_LOCAL) {
1446     info->nz_used      = isend[0];
1447     info->nz_allocated = isend[1];
1448     info->nz_unneeded  = isend[2];
1449     info->memory       = isend[3];
1450     info->mallocs      = isend[4];
1451   } else if (flag == MAT_GLOBAL_MAX) {
1452     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1453 
1454     info->nz_used      = irecv[0];
1455     info->nz_allocated = irecv[1];
1456     info->nz_unneeded  = irecv[2];
1457     info->memory       = irecv[3];
1458     info->mallocs      = irecv[4];
1459   } else if (flag == MAT_GLOBAL_SUM) {
1460     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1461 
1462     info->nz_used      = irecv[0];
1463     info->nz_allocated = irecv[1];
1464     info->nz_unneeded  = irecv[2];
1465     info->memory       = irecv[3];
1466     info->mallocs      = irecv[4];
1467   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1468   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1469   info->fill_ratio_needed = 0;
1470   info->factor_mallocs    = 0;
1471   PetscFunctionReturn(PETSC_SUCCESS);
1472 }
1473 
1474 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1475 {
1476   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1477   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1478 
1479   PetscFunctionBegin;
1480   switch (op) {
1481   case MAT_NEW_NONZERO_LOCATIONS:
1482   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1483   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1484   case MAT_KEEP_NONZERO_PATTERN:
1485   case MAT_SUBMAT_SINGLEIS:
1486   case MAT_NEW_NONZERO_LOCATION_ERR:
1487     MatCheckPreallocated(A, 1);
1488     PetscCall(MatSetOption(a->A, op, flg));
1489     PetscCall(MatSetOption(a->B, op, flg));
1490     break;
1491   case MAT_ROW_ORIENTED:
1492     MatCheckPreallocated(A, 1);
1493     a->roworiented = flg;
1494 
1495     PetscCall(MatSetOption(a->A, op, flg));
1496     PetscCall(MatSetOption(a->B, op, flg));
1497     break;
1498   case MAT_FORCE_DIAGONAL_ENTRIES:
1499   case MAT_SORTED_FULL:
1500     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1501     break;
1502   case MAT_IGNORE_OFF_PROC_ENTRIES:
1503     a->donotstash = flg;
1504     break;
1505   case MAT_USE_HASH_TABLE:
1506     a->ht_flag = flg;
1507     break;
1508   case MAT_HERMITIAN:
1509     MatCheckPreallocated(A, 1);
1510     PetscCall(MatSetOption(a->A, op, flg));
1511 #if defined(PETSC_USE_COMPLEX)
1512     if (flg) { /* need different mat-vec ops */
1513       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1514       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1515       A->ops->multtranspose    = NULL;
1516       A->ops->multtransposeadd = NULL;
1517       A->symmetric             = PETSC_BOOL3_FALSE;
1518     }
1519 #endif
1520     break;
1521   case MAT_SPD:
1522   case MAT_SYMMETRIC:
1523     MatCheckPreallocated(A, 1);
1524     PetscCall(MatSetOption(a->A, op, flg));
1525 #if defined(PETSC_USE_COMPLEX)
1526     if (flg) { /* restore to use default mat-vec ops */
1527       A->ops->mult             = MatMult_MPISBAIJ;
1528       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1529       A->ops->multtranspose    = MatMult_MPISBAIJ;
1530       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1531     }
1532 #endif
1533     break;
1534   case MAT_STRUCTURALLY_SYMMETRIC:
1535     MatCheckPreallocated(A, 1);
1536     PetscCall(MatSetOption(a->A, op, flg));
1537     break;
1538   case MAT_SYMMETRY_ETERNAL:
1539   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1540     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1541     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1542     break;
1543   case MAT_SPD_ETERNAL:
1544     break;
1545   case MAT_IGNORE_LOWER_TRIANGULAR:
1546     aA->ignore_ltriangular = flg;
1547     break;
1548   case MAT_ERROR_LOWER_TRIANGULAR:
1549     aA->ignore_ltriangular = flg;
1550     break;
1551   case MAT_GETROW_UPPERTRIANGULAR:
1552     aA->getrow_utriangular = flg;
1553     break;
1554   default:
1555     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1556   }
1557   PetscFunctionReturn(PETSC_SUCCESS);
1558 }
1559 
1560 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1561 {
1562   PetscFunctionBegin;
1563   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1564   if (reuse == MAT_INITIAL_MATRIX) {
1565     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1566   } else if (reuse == MAT_REUSE_MATRIX) {
1567     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1568   }
1569   PetscFunctionReturn(PETSC_SUCCESS);
1570 }
1571 
1572 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1573 {
1574   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1575   Mat           a = baij->A, b = baij->B;
1576   PetscInt      nv, m, n;
1577   PetscBool     flg;
1578 
1579   PetscFunctionBegin;
1580   if (ll != rr) {
1581     PetscCall(VecEqual(ll, rr, &flg));
1582     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1583   }
1584   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1585 
1586   PetscCall(MatGetLocalSize(mat, &m, &n));
1587   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1588 
1589   PetscCall(VecGetLocalSize(rr, &nv));
1590   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1591 
1592   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593 
1594   /* left diagonalscale the off-diagonal part */
1595   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1596 
1597   /* scale the diagonal part */
1598   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1599 
1600   /* right diagonalscale the off-diagonal part */
1601   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1602   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1607 {
1608   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(MatSetUnfactored(a->A));
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1616 
1617 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1618 {
1619   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1620   Mat           a, b, c, d;
1621   PetscBool     flg;
1622 
1623   PetscFunctionBegin;
1624   a = matA->A;
1625   b = matA->B;
1626   c = matB->A;
1627   d = matB->B;
1628 
1629   PetscCall(MatEqual(a, c, &flg));
1630   if (flg) PetscCall(MatEqual(b, d, &flg));
1631   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1636 {
1637   PetscBool isbaij;
1638 
1639   PetscFunctionBegin;
1640   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1641   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1642   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1643   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1644     PetscCall(MatGetRowUpperTriangular(A));
1645     PetscCall(MatCopy_Basic(A, B, str));
1646     PetscCall(MatRestoreRowUpperTriangular(A));
1647   } else {
1648     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1649     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1650 
1651     PetscCall(MatCopy(a->A, b->A, str));
1652     PetscCall(MatCopy(a->B, b->B, str));
1653   }
1654   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1659 {
1660   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1661   PetscBLASInt  bnz, one                          = 1;
1662   Mat_SeqSBAIJ *xa, *ya;
1663   Mat_SeqBAIJ  *xb, *yb;
1664 
1665   PetscFunctionBegin;
1666   if (str == SAME_NONZERO_PATTERN) {
1667     PetscScalar alpha = a;
1668     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1669     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1670     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1671     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1672     xb = (Mat_SeqBAIJ *)xx->B->data;
1673     yb = (Mat_SeqBAIJ *)yy->B->data;
1674     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1675     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1676     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1677   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1678     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1679     PetscCall(MatAXPY_Basic(Y, a, X, str));
1680     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1681   } else {
1682     Mat       B;
1683     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1684     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1685     PetscCall(MatGetRowUpperTriangular(X));
1686     PetscCall(MatGetRowUpperTriangular(Y));
1687     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1688     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1689     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1690     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1691     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1692     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1693     PetscCall(MatSetType(B, MATMPISBAIJ));
1694     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1695     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1696     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1697     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1698     PetscCall(MatHeaderMerge(Y, &B));
1699     PetscCall(PetscFree(nnz_d));
1700     PetscCall(PetscFree(nnz_o));
1701     PetscCall(MatRestoreRowUpperTriangular(X));
1702     PetscCall(MatRestoreRowUpperTriangular(Y));
1703   }
1704   PetscFunctionReturn(PETSC_SUCCESS);
1705 }
1706 
1707 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1708 {
1709   PetscInt  i;
1710   PetscBool flg;
1711 
1712   PetscFunctionBegin;
1713   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1714   for (i = 0; i < n; i++) {
1715     PetscCall(ISEqual(irow[i], icol[i], &flg));
1716     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1717   }
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1722 {
1723   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1724   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1725 
1726   PetscFunctionBegin;
1727   if (!Y->preallocated) {
1728     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1729   } else if (!aij->nz) {
1730     PetscInt nonew = aij->nonew;
1731     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1732     aij->nonew = nonew;
1733   }
1734   PetscCall(MatShift_Basic(Y, a));
1735   PetscFunctionReturn(PETSC_SUCCESS);
1736 }
1737 
1738 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1739 {
1740   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1741 
1742   PetscFunctionBegin;
1743   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1744   PetscCall(MatMissingDiagonal(a->A, missing, d));
1745   if (d) {
1746     PetscInt rstart;
1747     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1748     *d += rstart / A->rmap->bs;
1749   }
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1754 {
1755   PetscFunctionBegin;
1756   *a = ((Mat_MPISBAIJ *)A->data)->A;
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 /* -------------------------------------------------------------------*/
1761 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1762                                        MatGetRow_MPISBAIJ,
1763                                        MatRestoreRow_MPISBAIJ,
1764                                        MatMult_MPISBAIJ,
1765                                        /*  4*/ MatMultAdd_MPISBAIJ,
1766                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1767                                        MatMultAdd_MPISBAIJ,
1768                                        NULL,
1769                                        NULL,
1770                                        NULL,
1771                                        /* 10*/ NULL,
1772                                        NULL,
1773                                        NULL,
1774                                        MatSOR_MPISBAIJ,
1775                                        MatTranspose_MPISBAIJ,
1776                                        /* 15*/ MatGetInfo_MPISBAIJ,
1777                                        MatEqual_MPISBAIJ,
1778                                        MatGetDiagonal_MPISBAIJ,
1779                                        MatDiagonalScale_MPISBAIJ,
1780                                        MatNorm_MPISBAIJ,
1781                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1782                                        MatAssemblyEnd_MPISBAIJ,
1783                                        MatSetOption_MPISBAIJ,
1784                                        MatZeroEntries_MPISBAIJ,
1785                                        /* 24*/ NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        /* 29*/ MatSetUp_MPI_Hash,
1791                                        NULL,
1792                                        NULL,
1793                                        MatGetDiagonalBlock_MPISBAIJ,
1794                                        NULL,
1795                                        /* 34*/ MatDuplicate_MPISBAIJ,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        /* 39*/ MatAXPY_MPISBAIJ,
1801                                        MatCreateSubMatrices_MPISBAIJ,
1802                                        MatIncreaseOverlap_MPISBAIJ,
1803                                        MatGetValues_MPISBAIJ,
1804                                        MatCopy_MPISBAIJ,
1805                                        /* 44*/ NULL,
1806                                        MatScale_MPISBAIJ,
1807                                        MatShift_MPISBAIJ,
1808                                        NULL,
1809                                        NULL,
1810                                        /* 49*/ NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        NULL,
1815                                        /* 54*/ NULL,
1816                                        NULL,
1817                                        MatSetUnfactored_MPISBAIJ,
1818                                        NULL,
1819                                        MatSetValuesBlocked_MPISBAIJ,
1820                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        /* 64*/ NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1831                                        NULL,
1832                                        MatConvert_MPISBAIJ_Basic,
1833                                        NULL,
1834                                        NULL,
1835                                        /* 74*/ NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        /* 79*/ NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        MatLoad_MPISBAIJ,
1845                                        /* 84*/ NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        /* 89*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        /* 94*/ NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        /* 99*/ NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        MatConjugate_MPISBAIJ,
1864                                        NULL,
1865                                        /*104*/ NULL,
1866                                        MatRealPart_MPISBAIJ,
1867                                        MatImaginaryPart_MPISBAIJ,
1868                                        MatGetRowUpperTriangular_MPISBAIJ,
1869                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1870                                        /*109*/ NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        MatMissingDiagonal_MPISBAIJ,
1875                                        /*114*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        /*119*/ NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        /*124*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        /*129*/ NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        /*134*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        /*139*/ MatSetBlockSizes_Default,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        /*150*/ NULL,
1912                                        NULL};
1913 
1914 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1915 {
1916   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1917   PetscInt      i, mbs, Mbs;
1918   PetscMPIInt   size;
1919 
1920   PetscFunctionBegin;
1921   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1922   PetscCall(PetscLayoutSetUp(B->rmap));
1923   PetscCall(PetscLayoutSetUp(B->cmap));
1924   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1925   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1926   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1927 
1928   mbs = B->rmap->n / bs;
1929   Mbs = B->rmap->N / bs;
1930   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1931 
1932   B->rmap->bs = bs;
1933   b->bs2      = bs * bs;
1934   b->mbs      = mbs;
1935   b->Mbs      = Mbs;
1936   b->nbs      = B->cmap->n / bs;
1937   b->Nbs      = B->cmap->N / bs;
1938 
1939   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1940   b->rstartbs = B->rmap->rstart / bs;
1941   b->rendbs   = B->rmap->rend / bs;
1942 
1943   b->cstartbs = B->cmap->rstart / bs;
1944   b->cendbs   = B->cmap->rend / bs;
1945 
1946 #if defined(PETSC_USE_CTABLE)
1947   PetscCall(PetscHMapIDestroy(&b->colmap));
1948 #else
1949   PetscCall(PetscFree(b->colmap));
1950 #endif
1951   PetscCall(PetscFree(b->garray));
1952   PetscCall(VecDestroy(&b->lvec));
1953   PetscCall(VecScatterDestroy(&b->Mvctx));
1954   PetscCall(VecDestroy(&b->slvec0));
1955   PetscCall(VecDestroy(&b->slvec0b));
1956   PetscCall(VecDestroy(&b->slvec1));
1957   PetscCall(VecDestroy(&b->slvec1a));
1958   PetscCall(VecDestroy(&b->slvec1b));
1959   PetscCall(VecScatterDestroy(&b->sMvctx));
1960 
1961   /* Because the B will have been resized we simply destroy it and create a new one each time */
1962   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1963   PetscCall(MatDestroy(&b->B));
1964   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1965   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1966   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1967 
1968   if (!B->preallocated) {
1969     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1970     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1971     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1972     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1973   }
1974 
1975   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1976   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1977 
1978   B->preallocated  = PETSC_TRUE;
1979   B->was_assembled = PETSC_FALSE;
1980   B->assembled     = PETSC_FALSE;
1981   PetscFunctionReturn(PETSC_SUCCESS);
1982 }
1983 
1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1985 {
1986   PetscInt        m, rstart, cend;
1987   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1988   const PetscInt *JJ          = NULL;
1989   PetscScalar    *values      = NULL;
1990   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1991   PetscBool       nooffprocentries;
1992 
1993   PetscFunctionBegin;
1994   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1995   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1996   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1997   PetscCall(PetscLayoutSetUp(B->rmap));
1998   PetscCall(PetscLayoutSetUp(B->cmap));
1999   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2000   m      = B->rmap->n / bs;
2001   rstart = B->rmap->rstart / bs;
2002   cend   = B->cmap->rend / bs;
2003 
2004   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2005   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2006   for (i = 0; i < m; i++) {
2007     nz = ii[i + 1] - ii[i];
2008     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2009     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2010     JJ = jj + ii[i];
2011     bd = 0;
2012     for (j = 0; j < nz; j++) {
2013       if (*JJ >= i + rstart) break;
2014       JJ++;
2015       bd++;
2016     }
2017     d = 0;
2018     for (; j < nz; j++) {
2019       if (*JJ++ >= cend) break;
2020       d++;
2021     }
2022     d_nnz[i] = d;
2023     o_nnz[i] = nz - d - bd;
2024     nz       = nz - bd;
2025     nz_max   = PetscMax(nz_max, nz);
2026   }
2027   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2028   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2029   PetscCall(PetscFree2(d_nnz, o_nnz));
2030 
2031   values = (PetscScalar *)V;
2032   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2033   for (i = 0; i < m; i++) {
2034     PetscInt        row   = i + rstart;
2035     PetscInt        ncols = ii[i + 1] - ii[i];
2036     const PetscInt *icols = jj + ii[i];
2037     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2038       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2039       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2040     } else { /* block ordering does not match so we can only insert one block at a time. */
2041       PetscInt j;
2042       for (j = 0; j < ncols; j++) {
2043         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2044         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2045       }
2046     }
2047   }
2048 
2049   if (!V) PetscCall(PetscFree(values));
2050   nooffprocentries    = B->nooffprocentries;
2051   B->nooffprocentries = PETSC_TRUE;
2052   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2053   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2054   B->nooffprocentries = nooffprocentries;
2055 
2056   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2057   PetscFunctionReturn(PETSC_SUCCESS);
2058 }
2059 
2060 /*MC
2061    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2062    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2063    the matrix is stored.
2064 
2065    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2066    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2067 
2068    Options Database Keys:
2069 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2070 
2071    Note:
2072      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2073      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2074 
2075    Level: beginner
2076 
2077 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2078 M*/
2079 
2080 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2081 {
2082   Mat_MPISBAIJ *b;
2083   PetscBool     flg = PETSC_FALSE;
2084 
2085   PetscFunctionBegin;
2086   PetscCall(PetscNew(&b));
2087   B->data = (void *)b;
2088   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2089 
2090   B->ops->destroy = MatDestroy_MPISBAIJ;
2091   B->ops->view    = MatView_MPISBAIJ;
2092   B->assembled    = PETSC_FALSE;
2093   B->insertmode   = NOT_SET_VALUES;
2094 
2095   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2096   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2097 
2098   /* build local table of row and column ownerships */
2099   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2100 
2101   /* build cache for off array entries formed */
2102   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2103 
2104   b->donotstash  = PETSC_FALSE;
2105   b->colmap      = NULL;
2106   b->garray      = NULL;
2107   b->roworiented = PETSC_TRUE;
2108 
2109   /* stuff used in block assembly */
2110   b->barray = NULL;
2111 
2112   /* stuff used for matrix vector multiply */
2113   b->lvec    = NULL;
2114   b->Mvctx   = NULL;
2115   b->slvec0  = NULL;
2116   b->slvec0b = NULL;
2117   b->slvec1  = NULL;
2118   b->slvec1a = NULL;
2119   b->slvec1b = NULL;
2120   b->sMvctx  = NULL;
2121 
2122   /* stuff for MatGetRow() */
2123   b->rowindices   = NULL;
2124   b->rowvalues    = NULL;
2125   b->getrowactive = PETSC_FALSE;
2126 
2127   /* hash table stuff */
2128   b->ht           = NULL;
2129   b->hd           = NULL;
2130   b->ht_size      = 0;
2131   b->ht_flag      = PETSC_FALSE;
2132   b->ht_fact      = 0;
2133   b->ht_total_ct  = 0;
2134   b->ht_insert_ct = 0;
2135 
2136   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2137   b->ijonly = PETSC_FALSE;
2138 
2139   b->in_loc = NULL;
2140   b->v_loc  = NULL;
2141   b->n_loc  = 0;
2142 
2143   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2144   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2145   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2147 #if defined(PETSC_HAVE_ELEMENTAL)
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2149 #endif
2150 #if defined(PETSC_HAVE_SCALAPACK)
2151   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2152 #endif
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2155 
2156   B->symmetric                   = PETSC_BOOL3_TRUE;
2157   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2158   B->symmetry_eternal            = PETSC_TRUE;
2159   B->structural_symmetry_eternal = PETSC_TRUE;
2160 #if defined(PETSC_USE_COMPLEX)
2161   B->hermitian = PETSC_BOOL3_FALSE;
2162 #else
2163   B->hermitian = PETSC_BOOL3_TRUE;
2164 #endif
2165 
2166   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2167   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2168   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2169   if (flg) {
2170     PetscReal fact = 1.39;
2171     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2172     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2173     if (fact <= 1.0) fact = 1.39;
2174     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2175     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2176   }
2177   PetscOptionsEnd();
2178   PetscFunctionReturn(PETSC_SUCCESS);
2179 }
2180 
2181 /*MC
2182    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2183 
2184    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2185    and `MATMPISBAIJ` otherwise.
2186 
2187    Options Database Key:
2188 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2189 
2190   Level: beginner
2191 
2192 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2193 M*/
2194 
2195 /*@C
2196    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2197    the user should preallocate the matrix storage by setting the parameters
2198    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2199    performance can be increased by more than a factor of 50.
2200 
2201    Collective
2202 
2203    Input Parameters:
2204 +  B - the matrix
2205 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2206           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2207 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2208            submatrix  (same for all local rows)
2209 .  d_nnz - array containing the number of block nonzeros in the various block rows
2210            in the upper triangular and diagonal part of the in diagonal portion of the local
2211            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2212            for the diagonal entry and set a value even if it is zero.
2213 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2214            submatrix (same for all local rows).
2215 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2216            off-diagonal portion of the local submatrix that is right of the diagonal
2217            (possibly different for each block row) or NULL.
2218 
2219    Options Database Keys:
2220 +   -mat_no_unroll - uses code that does not unroll the loops in the
2221                      block calculations (much slower)
2222 -   -mat_block_size - size of the blocks to use
2223 
2224    Notes:
2225 
2226    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2227    than it must be used on all processors that share the object for that argument.
2228 
2229    If the *_nnz parameter is given then the *_nz parameter is ignored
2230 
2231    Storage Information:
2232    For a square global matrix we define each processor's diagonal portion
2233    to be its local rows and the corresponding columns (a square submatrix);
2234    each processor's off-diagonal portion encompasses the remainder of the
2235    local matrix (a rectangular submatrix).
2236 
2237    The user can specify preallocated storage for the diagonal part of
2238    the local submatrix with either d_nz or d_nnz (not both).  Set
2239    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2240    memory allocation.  Likewise, specify preallocated storage for the
2241    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2242 
2243    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2244    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2245    You can also run with the option -info and look for messages with the string
2246    malloc in them to see if additional memory allocation was needed.
2247 
2248    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2249    the figure below we depict these three local rows and all columns (0-11).
2250 
2251 .vb
2252            0 1 2 3 4 5 6 7 8 9 10 11
2253           --------------------------
2254    row 3  |. . . d d d o o o o  o  o
2255    row 4  |. . . d d d o o o o  o  o
2256    row 5  |. . . d d d o o o o  o  o
2257           --------------------------
2258 .ve
2259 
2260    Thus, any entries in the d locations are stored in the d (diagonal)
2261    submatrix, and any entries in the o locations are stored in the
2262    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2263    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2264 
2265    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2266    plus the diagonal part of the d matrix,
2267    and o_nz should indicate the number of block nonzeros per row in the o matrix
2268 
2269    In general, for PDE problems in which most nonzeros are near the diagonal,
2270    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2271    or you will get TERRIBLE performance; see the users' manual chapter on
2272    matrices.
2273 
2274    Level: intermediate
2275 
2276 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2277 @*/
2278 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2279 {
2280   PetscFunctionBegin;
2281   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2282   PetscValidType(B, 1);
2283   PetscValidLogicalCollectiveInt(B, bs, 2);
2284   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2285   PetscFunctionReturn(PETSC_SUCCESS);
2286 }
2287 
2288 /*@C
2289    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2290    (block compressed row).  For good matrix assembly performance
2291    the user should preallocate the matrix storage by setting the parameters
2292    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2293    performance can be increased by more than a factor of 50.
2294 
2295    Collective
2296 
2297    Input Parameters:
2298 +  comm - MPI communicator
2299 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2300           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2301 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2302            This value should be the same as the local size used in creating the
2303            y vector for the matrix-vector product y = Ax.
2304 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2305            This value should be the same as the local size used in creating the
2306            x vector for the matrix-vector product y = Ax.
2307 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2308 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2309 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2310            submatrix (same for all local rows)
2311 .  d_nnz - array containing the number of block nonzeros in the various block rows
2312            in the upper triangular portion of the in diagonal portion of the local
2313            (possibly different for each block block row) or NULL.
2314            If you plan to factor the matrix you must leave room for the diagonal entry and
2315            set its value even if it is zero.
2316 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2317            submatrix (same for all local rows).
2318 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2319            off-diagonal portion of the local submatrix (possibly different for
2320            each block row) or NULL.
2321 
2322    Output Parameter:
2323 .  A - the matrix
2324 
2325    Options Database Keys:
2326 +   -mat_no_unroll - uses code that does not unroll the loops in the
2327                      block calculations (much slower)
2328 .   -mat_block_size - size of the blocks to use
2329 -   -mat_mpi - use the parallel matrix data structures even on one processor
2330                (defaults to using SeqBAIJ format on one processor)
2331 
2332    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2333    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2334    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2335 
2336    Notes:
2337    The number of rows and columns must be divisible by blocksize.
2338    This matrix type does not support complex Hermitian operation.
2339 
2340    The user MUST specify either the local or global matrix dimensions
2341    (possibly both).
2342 
2343    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2344    than it must be used on all processors that share the object for that argument.
2345 
2346    If the *_nnz parameter is given then the *_nz parameter is ignored
2347 
2348    Storage Information:
2349    For a square global matrix we define each processor's diagonal portion
2350    to be its local rows and the corresponding columns (a square submatrix);
2351    each processor's off-diagonal portion encompasses the remainder of the
2352    local matrix (a rectangular submatrix).
2353 
2354    The user can specify preallocated storage for the diagonal part of
2355    the local submatrix with either d_nz or d_nnz (not both). Set
2356    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2357    memory allocation. Likewise, specify preallocated storage for the
2358    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2359 
2360    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2361    the figure below we depict these three local rows and all columns (0-11).
2362 
2363 .vb
2364            0 1 2 3 4 5 6 7 8 9 10 11
2365           --------------------------
2366    row 3  |. . . d d d o o o o  o  o
2367    row 4  |. . . d d d o o o o  o  o
2368    row 5  |. . . d d d o o o o  o  o
2369           --------------------------
2370 .ve
2371 
2372    Thus, any entries in the d locations are stored in the d (diagonal)
2373    submatrix, and any entries in the o locations are stored in the
2374    o (off-diagonal) submatrix. Note that the d matrix is stored in
2375    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2376 
2377    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2378    plus the diagonal part of the d matrix,
2379    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2380    In general, for PDE problems in which most nonzeros are near the diagonal,
2381    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2382    or you will get TERRIBLE performance; see the users' manual chapter on
2383    matrices.
2384 
2385    Level: intermediate
2386 
2387 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2388 @*/
2389 
2390 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2391 {
2392   PetscMPIInt size;
2393 
2394   PetscFunctionBegin;
2395   PetscCall(MatCreate(comm, A));
2396   PetscCall(MatSetSizes(*A, m, n, M, N));
2397   PetscCallMPI(MPI_Comm_size(comm, &size));
2398   if (size > 1) {
2399     PetscCall(MatSetType(*A, MATMPISBAIJ));
2400     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2401   } else {
2402     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2403     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2404   }
2405   PetscFunctionReturn(PETSC_SUCCESS);
2406 }
2407 
2408 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2409 {
2410   Mat           mat;
2411   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2412   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2413   PetscScalar  *array;
2414 
2415   PetscFunctionBegin;
2416   *newmat = NULL;
2417 
2418   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2419   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2420   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2421   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2422   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2423 
2424   mat->factortype   = matin->factortype;
2425   mat->preallocated = PETSC_TRUE;
2426   mat->assembled    = PETSC_TRUE;
2427   mat->insertmode   = NOT_SET_VALUES;
2428 
2429   a      = (Mat_MPISBAIJ *)mat->data;
2430   a->bs2 = oldmat->bs2;
2431   a->mbs = oldmat->mbs;
2432   a->nbs = oldmat->nbs;
2433   a->Mbs = oldmat->Mbs;
2434   a->Nbs = oldmat->Nbs;
2435 
2436   a->size         = oldmat->size;
2437   a->rank         = oldmat->rank;
2438   a->donotstash   = oldmat->donotstash;
2439   a->roworiented  = oldmat->roworiented;
2440   a->rowindices   = NULL;
2441   a->rowvalues    = NULL;
2442   a->getrowactive = PETSC_FALSE;
2443   a->barray       = NULL;
2444   a->rstartbs     = oldmat->rstartbs;
2445   a->rendbs       = oldmat->rendbs;
2446   a->cstartbs     = oldmat->cstartbs;
2447   a->cendbs       = oldmat->cendbs;
2448 
2449   /* hash table stuff */
2450   a->ht           = NULL;
2451   a->hd           = NULL;
2452   a->ht_size      = 0;
2453   a->ht_flag      = oldmat->ht_flag;
2454   a->ht_fact      = oldmat->ht_fact;
2455   a->ht_total_ct  = 0;
2456   a->ht_insert_ct = 0;
2457 
2458   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2459   if (oldmat->colmap) {
2460 #if defined(PETSC_USE_CTABLE)
2461     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2462 #else
2463     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2464     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2465 #endif
2466   } else a->colmap = NULL;
2467 
2468   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2469     PetscCall(PetscMalloc1(len, &a->garray));
2470     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2471   } else a->garray = NULL;
2472 
2473   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2474   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2475   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2476 
2477   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2478   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2479 
2480   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2481   PetscCall(VecGetArray(a->slvec1, &array));
2482   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2483   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2484   PetscCall(VecRestoreArray(a->slvec1, &array));
2485   PetscCall(VecGetArray(a->slvec0, &array));
2486   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2487   PetscCall(VecRestoreArray(a->slvec0, &array));
2488 
2489   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2490   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2491   a->sMvctx = oldmat->sMvctx;
2492 
2493   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2494   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2495   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2496   *newmat = mat;
2497   PetscFunctionReturn(PETSC_SUCCESS);
2498 }
2499 
2500 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2501 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2502 
2503 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2504 {
2505   PetscBool isbinary;
2506 
2507   PetscFunctionBegin;
2508   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2509   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2510   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2511   PetscFunctionReturn(PETSC_SUCCESS);
2512 }
2513 
2514 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2515 {
2516   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2517   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2518   PetscReal     atmp;
2519   PetscReal    *work, *svalues, *rvalues;
2520   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2521   PetscMPIInt   rank, size;
2522   PetscInt     *rowners_bs, dest, count, source;
2523   PetscScalar  *va;
2524   MatScalar    *ba;
2525   MPI_Status    stat;
2526 
2527   PetscFunctionBegin;
2528   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2529   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2530   PetscCall(VecGetArray(v, &va));
2531 
2532   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2533   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2534 
2535   bs  = A->rmap->bs;
2536   mbs = a->mbs;
2537   Mbs = a->Mbs;
2538   ba  = b->a;
2539   bi  = b->i;
2540   bj  = b->j;
2541 
2542   /* find ownerships */
2543   rowners_bs = A->rmap->range;
2544 
2545   /* each proc creates an array to be distributed */
2546   PetscCall(PetscCalloc1(bs * Mbs, &work));
2547 
2548   /* row_max for B */
2549   if (rank != size - 1) {
2550     for (i = 0; i < mbs; i++) {
2551       ncols = bi[1] - bi[0];
2552       bi++;
2553       brow = bs * i;
2554       for (j = 0; j < ncols; j++) {
2555         bcol = bs * (*bj);
2556         for (kcol = 0; kcol < bs; kcol++) {
2557           col = bcol + kcol;           /* local col index */
2558           col += rowners_bs[rank + 1]; /* global col index */
2559           for (krow = 0; krow < bs; krow++) {
2560             atmp = PetscAbsScalar(*ba);
2561             ba++;
2562             row = brow + krow; /* local row index */
2563             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2564             if (work[col] < atmp) work[col] = atmp;
2565           }
2566         }
2567         bj++;
2568       }
2569     }
2570 
2571     /* send values to its owners */
2572     for (dest = rank + 1; dest < size; dest++) {
2573       svalues = work + rowners_bs[dest];
2574       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2575       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2576     }
2577   }
2578 
2579   /* receive values */
2580   if (rank) {
2581     rvalues = work;
2582     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2583     for (source = 0; source < rank; source++) {
2584       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2585       /* process values */
2586       for (i = 0; i < count; i++) {
2587         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2588       }
2589     }
2590   }
2591 
2592   PetscCall(VecRestoreArray(v, &va));
2593   PetscCall(PetscFree(work));
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2598 {
2599   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2600   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2601   PetscScalar       *x, *ptr, *from;
2602   Vec                bb1;
2603   const PetscScalar *b;
2604 
2605   PetscFunctionBegin;
2606   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2607   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2608 
2609   if (flag == SOR_APPLY_UPPER) {
2610     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2611     PetscFunctionReturn(PETSC_SUCCESS);
2612   }
2613 
2614   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2615     if (flag & SOR_ZERO_INITIAL_GUESS) {
2616       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2617       its--;
2618     }
2619 
2620     PetscCall(VecDuplicate(bb, &bb1));
2621     while (its--) {
2622       /* lower triangular part: slvec0b = - B^T*xx */
2623       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2624 
2625       /* copy xx into slvec0a */
2626       PetscCall(VecGetArray(mat->slvec0, &ptr));
2627       PetscCall(VecGetArray(xx, &x));
2628       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2629       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2630 
2631       PetscCall(VecScale(mat->slvec0, -1.0));
2632 
2633       /* copy bb into slvec1a */
2634       PetscCall(VecGetArray(mat->slvec1, &ptr));
2635       PetscCall(VecGetArrayRead(bb, &b));
2636       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2637       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2638 
2639       /* set slvec1b = 0 */
2640       PetscCall(VecSet(mat->slvec1b, 0.0));
2641 
2642       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2643       PetscCall(VecRestoreArray(xx, &x));
2644       PetscCall(VecRestoreArrayRead(bb, &b));
2645       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2646 
2647       /* upper triangular part: bb1 = bb1 - B*x */
2648       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2649 
2650       /* local diagonal sweep */
2651       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2652     }
2653     PetscCall(VecDestroy(&bb1));
2654   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2655     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2656   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2657     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2658   } else if (flag & SOR_EISENSTAT) {
2659     Vec                xx1;
2660     PetscBool          hasop;
2661     const PetscScalar *diag;
2662     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2663     PetscInt           i, n;
2664 
2665     if (!mat->xx1) {
2666       PetscCall(VecDuplicate(bb, &mat->xx1));
2667       PetscCall(VecDuplicate(bb, &mat->bb1));
2668     }
2669     xx1 = mat->xx1;
2670     bb1 = mat->bb1;
2671 
2672     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2673 
2674     if (!mat->diag) {
2675       /* this is wrong for same matrix with new nonzero values */
2676       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2677       PetscCall(MatGetDiagonal(matin, mat->diag));
2678     }
2679     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2680 
2681     if (hasop) {
2682       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2683       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2684     } else {
2685       /*
2686           These two lines are replaced by code that may be a bit faster for a good compiler
2687       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2688       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2689       */
2690       PetscCall(VecGetArray(mat->slvec1a, &sl));
2691       PetscCall(VecGetArrayRead(mat->diag, &diag));
2692       PetscCall(VecGetArrayRead(bb, &b));
2693       PetscCall(VecGetArray(xx, &x));
2694       PetscCall(VecGetLocalSize(xx, &n));
2695       if (omega == 1.0) {
2696         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2697         PetscCall(PetscLogFlops(2.0 * n));
2698       } else {
2699         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2700         PetscCall(PetscLogFlops(3.0 * n));
2701       }
2702       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2703       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2704       PetscCall(VecRestoreArrayRead(bb, &b));
2705       PetscCall(VecRestoreArray(xx, &x));
2706     }
2707 
2708     /* multiply off-diagonal portion of matrix */
2709     PetscCall(VecSet(mat->slvec1b, 0.0));
2710     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2711     PetscCall(VecGetArray(mat->slvec0, &from));
2712     PetscCall(VecGetArray(xx, &x));
2713     PetscCall(PetscArraycpy(from, x, bs * mbs));
2714     PetscCall(VecRestoreArray(mat->slvec0, &from));
2715     PetscCall(VecRestoreArray(xx, &x));
2716     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2717     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2718     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2719 
2720     /* local sweep */
2721     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2722     PetscCall(VecAXPY(xx, 1.0, xx1));
2723   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2724   PetscFunctionReturn(PETSC_SUCCESS);
2725 }
2726 
2727 /*@
2728      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2729          CSR format the local rows.
2730 
2731    Collective
2732 
2733    Input Parameters:
2734 +  comm - MPI communicator
2735 .  bs - the block size, only a block size of 1 is supported
2736 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2737 .  n - This value should be the same as the local size used in creating the
2738        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2739        calculated if N is given) For square matrices n is almost always m.
2740 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2741 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2742 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2743 .   j - column indices
2744 -   a - matrix values
2745 
2746    Output Parameter:
2747 .   mat - the matrix
2748 
2749    Level: intermediate
2750 
2751    Notes:
2752        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2753      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2754      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2755 
2756        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2757 
2758 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2759           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2760 @*/
2761 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2762 {
2763   PetscFunctionBegin;
2764   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2765   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2766   PetscCall(MatCreate(comm, mat));
2767   PetscCall(MatSetSizes(*mat, m, n, M, N));
2768   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2769   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2770   PetscFunctionReturn(PETSC_SUCCESS);
2771 }
2772 
2773 /*@C
2774    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2775 
2776    Collective
2777 
2778    Input Parameters:
2779 +  B - the matrix
2780 .  bs - the block size
2781 .  i - the indices into j for the start of each local row (starts with zero)
2782 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2783 -  v - optional values in the matrix
2784 
2785    Level: advanced
2786 
2787    Notes:
2788    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2789    and usually the numerical values as well
2790 
2791    Any entries below the diagonal are ignored
2792 
2793 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2794 @*/
2795 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2796 {
2797   PetscFunctionBegin;
2798   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2799   PetscFunctionReturn(PETSC_SUCCESS);
2800 }
2801 
2802 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2803 {
2804   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2805   PetscInt    *indx;
2806   PetscScalar *values;
2807 
2808   PetscFunctionBegin;
2809   PetscCall(MatGetSize(inmat, &m, &N));
2810   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2811     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2812     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2813     PetscInt     *bindx, rmax = a->rmax, j;
2814     PetscMPIInt   rank, size;
2815 
2816     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2817     mbs = m / bs;
2818     Nbs = N / cbs;
2819     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2820     nbs = n / cbs;
2821 
2822     PetscCall(PetscMalloc1(rmax, &bindx));
2823     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2824 
2825     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2826     PetscCallMPI(MPI_Comm_rank(comm, &size));
2827     if (rank == size - 1) {
2828       /* Check sum(nbs) = Nbs */
2829       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2830     }
2831 
2832     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2833     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2834     for (i = 0; i < mbs; i++) {
2835       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2836       nnz = nnz / bs;
2837       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2838       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2839       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2840     }
2841     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2842     PetscCall(PetscFree(bindx));
2843 
2844     PetscCall(MatCreate(comm, outmat));
2845     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2846     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2847     PetscCall(MatSetType(*outmat, MATSBAIJ));
2848     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2849     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2850     MatPreallocateEnd(dnz, onz);
2851   }
2852 
2853   /* numeric phase */
2854   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2855   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2856 
2857   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2858   for (i = 0; i < m; i++) {
2859     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2860     Ii = i + rstart;
2861     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2862     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2863   }
2864   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2865   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2866   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2867   PetscFunctionReturn(PETSC_SUCCESS);
2868 }
2869