xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 30bab8dbce35fbb76248bd0fea1b2bc98f8a4f04)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11 #if defined(PETSC_USE_LOG)
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13 #endif
14   PetscCall(MatStashDestroy_Private(&mat->stash));
15   PetscCall(MatStashDestroy_Private(&mat->bstash));
16   PetscCall(MatDestroy(&baij->A));
17   PetscCall(MatDestroy(&baij->B));
18 #if defined(PETSC_USE_CTABLE)
19   PetscCall(PetscHMapIDestroy(&baij->colmap));
20 #else
21   PetscCall(PetscFree(baij->colmap));
22 #endif
23   PetscCall(PetscFree(baij->garray));
24   PetscCall(VecDestroy(&baij->lvec));
25   PetscCall(VecScatterDestroy(&baij->Mvctx));
26   PetscCall(VecDestroy(&baij->slvec0));
27   PetscCall(VecDestroy(&baij->slvec0b));
28   PetscCall(VecDestroy(&baij->slvec1));
29   PetscCall(VecDestroy(&baij->slvec1a));
30   PetscCall(VecDestroy(&baij->slvec1b));
31   PetscCall(VecScatterDestroy(&baij->sMvctx));
32   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
33   PetscCall(PetscFree(baij->barray));
34   PetscCall(PetscFree(baij->hd));
35   PetscCall(VecDestroy(&baij->diag));
36   PetscCall(VecDestroy(&baij->bb1));
37   PetscCall(VecDestroy(&baij->xx1));
38 #if defined(PETSC_USE_REAL_MAT_SINGLE)
39   PetscCall(PetscFree(baij->setvaluescopy));
40 #endif
41   PetscCall(PetscFree(baij->in_loc));
42   PetscCall(PetscFree(baij->v_loc));
43   PetscCall(PetscFree(baij->rangebs));
44   PetscCall(PetscFree(mat->data));
45 
46   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
51 #if defined(PETSC_HAVE_ELEMENTAL)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
53 #endif
54 #if defined(PETSC_HAVE_SCALAPACK)
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
56 #endif
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
58   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
63 #define TYPE SBAIJ
64 #define TYPE_SBAIJ
65 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
66 #undef TYPE
67 #undef TYPE_SBAIJ
68 
69 #if defined(PETSC_HAVE_ELEMENTAL)
70 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
71 #endif
72 #if defined(PETSC_HAVE_SCALAPACK)
73 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74 #endif
75 
76 /* This could be moved to matimpl.h */
77 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78 {
79   Mat       preallocator;
80   PetscInt  r, rstart, rend;
81   PetscInt  bs, i, m, n, M, N;
82   PetscBool cong = PETSC_TRUE;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
86   PetscValidLogicalCollectiveInt(B, nm, 2);
87   for (i = 0; i < nm; i++) {
88     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
89     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
90     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91   }
92   PetscValidLogicalCollectiveBool(B, fill, 5);
93   PetscCall(MatGetBlockSize(B, &bs));
94   PetscCall(MatGetSize(B, &M, &N));
95   PetscCall(MatGetLocalSize(B, &m, &n));
96   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
97   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
98   PetscCall(MatSetBlockSize(preallocator, bs));
99   PetscCall(MatSetSizes(preallocator, m, n, M, N));
100   PetscCall(MatSetUp(preallocator));
101   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102   for (r = rstart; r < rend; ++r) {
103     PetscInt           ncols;
104     const PetscInt    *row;
105     const PetscScalar *vals;
106 
107     for (i = 0; i < nm; i++) {
108       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
109       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
110       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
111       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112     }
113   }
114   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
116   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
117   PetscCall(MatDestroy(&preallocator));
118   PetscFunctionReturn(PETSC_SUCCESS);
119 }
120 
121 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122 {
123   Mat      B;
124   PetscInt r;
125 
126   PetscFunctionBegin;
127   if (reuse != MAT_REUSE_MATRIX) {
128     PetscBool symm = PETSC_TRUE, isdense;
129     PetscInt  bs;
130 
131     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
132     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
133     PetscCall(MatSetType(B, newtype));
134     PetscCall(MatGetBlockSize(A, &bs));
135     PetscCall(MatSetBlockSize(B, bs));
136     PetscCall(PetscLayoutSetUp(B->rmap));
137     PetscCall(PetscLayoutSetUp(B->cmap));
138     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
139     if (!isdense) {
140       PetscCall(MatGetRowUpperTriangular(A));
141       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
142       PetscCall(MatRestoreRowUpperTriangular(A));
143     } else {
144       PetscCall(MatSetUp(B));
145     }
146   } else {
147     B = *newmat;
148     PetscCall(MatZeroEntries(B));
149   }
150 
151   PetscCall(MatGetRowUpperTriangular(A));
152   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153     PetscInt           ncols;
154     const PetscInt    *row;
155     const PetscScalar *vals;
156 
157     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
158     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159 #if defined(PETSC_USE_COMPLEX)
160     if (A->hermitian == PETSC_BOOL3_TRUE) {
161       PetscInt i;
162       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163     } else {
164       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165     }
166 #else
167     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168 #endif
169     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170   }
171   PetscCall(MatRestoreRowUpperTriangular(A));
172   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
173   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
174 
175   if (reuse == MAT_INPLACE_MATRIX) {
176     PetscCall(MatHeaderReplace(A, &B));
177   } else {
178     *newmat = B;
179   }
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
183 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184 {
185   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
186 
187   PetscFunctionBegin;
188   PetscCall(MatStoreValues(aij->A));
189   PetscCall(MatStoreValues(aij->B));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194 {
195   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
196 
197   PetscFunctionBegin;
198   PetscCall(MatRetrieveValues(aij->A));
199   PetscCall(MatRetrieveValues(aij->B));
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204   { \
205     brow = row / bs; \
206     rp   = aj + ai[brow]; \
207     ap   = aa + bs2 * ai[brow]; \
208     rmax = aimax[brow]; \
209     nrow = ailen[brow]; \
210     bcol = col / bs; \
211     ridx = row % bs; \
212     cidx = col % bs; \
213     low  = 0; \
214     high = nrow; \
215     while (high - low > 3) { \
216       t = (low + high) / 2; \
217       if (rp[t] > bcol) high = t; \
218       else low = t; \
219     } \
220     for (_i = low; _i < high; _i++) { \
221       if (rp[_i] > bcol) break; \
222       if (rp[_i] == bcol) { \
223         bap = ap + bs2 * _i + bs * cidx + ridx; \
224         if (addv == ADD_VALUES) *bap += value; \
225         else *bap = value; \
226         goto a_noinsert; \
227       } \
228     } \
229     if (a->nonew == 1) goto a_noinsert; \
230     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232     N = nrow++ - 1; \
233     /* shift up all the later entries in this row */ \
234     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
235     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
236     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237     rp[_i]                          = bcol; \
238     ap[bs2 * _i + bs * cidx + ridx] = value; \
239     A->nonzerostate++; \
240   a_noinsert:; \
241     ailen[brow] = nrow; \
242   }
243 
244 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245   { \
246     brow = row / bs; \
247     rp   = bj + bi[brow]; \
248     ap   = ba + bs2 * bi[brow]; \
249     rmax = bimax[brow]; \
250     nrow = bilen[brow]; \
251     bcol = col / bs; \
252     ridx = row % bs; \
253     cidx = col % bs; \
254     low  = 0; \
255     high = nrow; \
256     while (high - low > 3) { \
257       t = (low + high) / 2; \
258       if (rp[t] > bcol) high = t; \
259       else low = t; \
260     } \
261     for (_i = low; _i < high; _i++) { \
262       if (rp[_i] > bcol) break; \
263       if (rp[_i] == bcol) { \
264         bap = ap + bs2 * _i + bs * cidx + ridx; \
265         if (addv == ADD_VALUES) *bap += value; \
266         else *bap = value; \
267         goto b_noinsert; \
268       } \
269     } \
270     if (b->nonew == 1) goto b_noinsert; \
271     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273     N = nrow++ - 1; \
274     /* shift up all the later entries in this row */ \
275     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
276     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
277     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278     rp[_i]                          = bcol; \
279     ap[bs2 * _i + bs * cidx + ridx] = value; \
280     B->nonzerostate++; \
281   b_noinsert:; \
282     bilen[brow] = nrow; \
283   }
284 
285 /* Only add/insert a(i,j) with i<=j (blocks).
286    Any a(i,j) with i>j input by user is ignored or generates an error
287 */
288 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289 {
290   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291   MatScalar     value;
292   PetscBool     roworiented = baij->roworiented;
293   PetscInt      i, j, row, col;
294   PetscInt      rstart_orig = mat->rmap->rstart;
295   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
297 
298   /* Some Variables required in the macro */
299   Mat           A     = baij->A;
300   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
301   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302   MatScalar    *aa = a->a;
303 
304   Mat          B     = baij->B;
305   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
306   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307   MatScalar   *ba = b->a;
308 
309   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
310   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
311   MatScalar *ap, *bap;
312 
313   /* for stash */
314   PetscInt   n_loc, *in_loc = NULL;
315   MatScalar *v_loc = NULL;
316 
317   PetscFunctionBegin;
318   if (!baij->donotstash) {
319     if (n > baij->n_loc) {
320       PetscCall(PetscFree(baij->in_loc));
321       PetscCall(PetscFree(baij->v_loc));
322       PetscCall(PetscMalloc1(n, &baij->in_loc));
323       PetscCall(PetscMalloc1(n, &baij->v_loc));
324 
325       baij->n_loc = n;
326     }
327     in_loc = baij->in_loc;
328     v_loc  = baij->v_loc;
329   }
330 
331   for (i = 0; i < m; i++) {
332     if (im[i] < 0) continue;
333     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335       row = im[i] - rstart_orig;                     /* local row index */
336       for (j = 0; j < n; j++) {
337         if (im[i] / bs > in[j] / bs) {
338           if (a->ignore_ltriangular) {
339             continue; /* ignore lower triangular blocks */
340           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
341         }
342         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343           col  = in[j] - cstart_orig;                    /* local col index */
344           brow = row / bs;
345           bcol = col / bs;
346           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347           if (roworiented) value = v[i * n + j];
348           else value = v[i + j * m];
349           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
350           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351         } else if (in[j] < 0) {
352           continue;
353         } else {
354           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355           /* off-diag entry (B) */
356           if (mat->was_assembled) {
357             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358 #if defined(PETSC_USE_CTABLE)
359             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
360             col = col - 1;
361 #else
362             col = baij->colmap[in[j] / bs] - 1;
363 #endif
364             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
365               PetscCall(MatDisAssemble_MPISBAIJ(mat));
366               col = in[j];
367               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368               B     = baij->B;
369               b     = (Mat_SeqBAIJ *)(B)->data;
370               bimax = b->imax;
371               bi    = b->i;
372               bilen = b->ilen;
373               bj    = b->j;
374               ba    = b->a;
375             } else col += in[j] % bs;
376           } else col = in[j];
377           if (roworiented) value = v[i * n + j];
378           else value = v[i + j * m];
379           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
380           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381         }
382       }
383     } else { /* off processor entry */
384       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385       if (!baij->donotstash) {
386         mat->assembled = PETSC_FALSE;
387         n_loc          = 0;
388         for (j = 0; j < n; j++) {
389           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390           in_loc[n_loc] = in[j];
391           if (roworiented) {
392             v_loc[n_loc] = v[i * n + j];
393           } else {
394             v_loc[n_loc] = v[j * m + i];
395           }
396           n_loc++;
397         }
398         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399       }
400     }
401   }
402   PetscFunctionReturn(PETSC_SUCCESS);
403 }
404 
405 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406 {
407   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
408   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
409   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
410   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
411   PetscBool          roworiented = a->roworiented;
412   const PetscScalar *value       = v;
413   MatScalar         *ap, *aa = a->a, *bap;
414 
415   PetscFunctionBegin;
416   if (col < row) {
417     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
418     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
419   }
420   rp    = aj + ai[row];
421   ap    = aa + bs2 * ai[row];
422   rmax  = imax[row];
423   nrow  = ailen[row];
424   value = v;
425   low   = 0;
426   high  = nrow;
427 
428   while (high - low > 7) {
429     t = (low + high) / 2;
430     if (rp[t] > col) high = t;
431     else low = t;
432   }
433   for (i = low; i < high; i++) {
434     if (rp[i] > col) break;
435     if (rp[i] == col) {
436       bap = ap + bs2 * i;
437       if (roworiented) {
438         if (is == ADD_VALUES) {
439           for (ii = 0; ii < bs; ii++) {
440             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
441           }
442         } else {
443           for (ii = 0; ii < bs; ii++) {
444             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
445           }
446         }
447       } else {
448         if (is == ADD_VALUES) {
449           for (ii = 0; ii < bs; ii++) {
450             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
451           }
452         } else {
453           for (ii = 0; ii < bs; ii++) {
454             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
455           }
456         }
457       }
458       goto noinsert2;
459     }
460   }
461   if (nonew == 1) goto noinsert2;
462   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
463   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
464   N = nrow++ - 1;
465   high++;
466   /* shift up all the later entries in this row */
467   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
468   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
469   rp[i] = col;
470   bap   = ap + bs2 * i;
471   if (roworiented) {
472     for (ii = 0; ii < bs; ii++) {
473       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
474     }
475   } else {
476     for (ii = 0; ii < bs; ii++) {
477       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
478     }
479   }
480 noinsert2:;
481   ailen[row] = nrow;
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 /*
486    This routine is exactly duplicated in mpibaij.c
487 */
488 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489 {
490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
491   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
492   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
493   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
494   PetscBool          roworiented = a->roworiented;
495   const PetscScalar *value       = v;
496   MatScalar         *ap, *aa = a->a, *bap;
497 
498   PetscFunctionBegin;
499   rp    = aj + ai[row];
500   ap    = aa + bs2 * ai[row];
501   rmax  = imax[row];
502   nrow  = ailen[row];
503   low   = 0;
504   high  = nrow;
505   value = v;
506   while (high - low > 7) {
507     t = (low + high) / 2;
508     if (rp[t] > col) high = t;
509     else low = t;
510   }
511   for (i = low; i < high; i++) {
512     if (rp[i] > col) break;
513     if (rp[i] == col) {
514       bap = ap + bs2 * i;
515       if (roworiented) {
516         if (is == ADD_VALUES) {
517           for (ii = 0; ii < bs; ii++) {
518             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
519           }
520         } else {
521           for (ii = 0; ii < bs; ii++) {
522             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
523           }
524         }
525       } else {
526         if (is == ADD_VALUES) {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
529             bap += bs;
530           }
531         } else {
532           for (ii = 0; ii < bs; ii++, value += bs) {
533             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
534             bap += bs;
535           }
536         }
537       }
538       goto noinsert2;
539     }
540   }
541   if (nonew == 1) goto noinsert2;
542   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
543   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
544   N = nrow++ - 1;
545   high++;
546   /* shift up all the later entries in this row */
547   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
548   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
549   rp[i] = col;
550   bap   = ap + bs2 * i;
551   if (roworiented) {
552     for (ii = 0; ii < bs; ii++) {
553       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
554     }
555   } else {
556     for (ii = 0; ii < bs; ii++) {
557       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
558     }
559   }
560 noinsert2:;
561   ailen[row] = nrow;
562   PetscFunctionReturn(PETSC_SUCCESS);
563 }
564 
565 /*
566     This routine could be optimized by removing the need for the block copy below and passing stride information
567   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
568 */
569 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570 {
571   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
572   const MatScalar *value;
573   MatScalar       *barray      = baij->barray;
574   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
576   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
578 
579   PetscFunctionBegin;
580   if (!barray) {
581     PetscCall(PetscMalloc1(bs2, &barray));
582     baij->barray = barray;
583   }
584 
585   if (roworiented) {
586     stepval = (n - 1) * bs;
587   } else {
588     stepval = (m - 1) * bs;
589   }
590   for (i = 0; i < m; i++) {
591     if (im[i] < 0) continue;
592     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
593     if (im[i] >= rstart && im[i] < rend) {
594       row = im[i] - rstart;
595       for (j = 0; j < n; j++) {
596         if (im[i] > in[j]) {
597           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599         }
600         /* If NumCol = 1 then a copy is not required */
601         if ((roworiented) && (n == 1)) {
602           barray = (MatScalar *)v + i * bs2;
603         } else if ((!roworiented) && (m == 1)) {
604           barray = (MatScalar *)v + j * bs2;
605         } else { /* Here a copy is required */
606           if (roworiented) {
607             value = v + i * (stepval + bs) * bs + j * bs;
608           } else {
609             value = v + j * (stepval + bs) * bs + i * bs;
610           }
611           for (ii = 0; ii < bs; ii++, value += stepval) {
612             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
613           }
614           barray -= bs2;
615         }
616 
617         if (in[j] >= cstart && in[j] < cend) {
618           col = in[j] - cstart;
619           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620         } else if (in[j] < 0) {
621           continue;
622         } else {
623           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
624           if (mat->was_assembled) {
625             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
626 
627 #if defined(PETSC_USE_DEBUG)
628   #if defined(PETSC_USE_CTABLE)
629             {
630               PetscInt data;
631               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
632               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
633             }
634   #else
635             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
636   #endif
637 #endif
638 #if defined(PETSC_USE_CTABLE)
639             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
640             col = (col - 1) / bs;
641 #else
642             col = (baij->colmap[in[j]] - 1) / bs;
643 #endif
644             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
645               PetscCall(MatDisAssemble_MPISBAIJ(mat));
646               col = in[j];
647             }
648           } else col = in[j];
649           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
650         }
651       }
652     } else {
653       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
654       if (!baij->donotstash) {
655         if (roworiented) {
656           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         } else {
658           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
659         }
660       }
661     }
662   }
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667 {
668   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
671 
672   PetscFunctionBegin;
673   for (i = 0; i < m; i++) {
674     if (idxm[i] < 0) continue; /* negative row */
675     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677       row = idxm[i] - bsrstart;
678       for (j = 0; j < n; j++) {
679         if (idxn[j] < 0) continue; /* negative column */
680         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681         if (idxn[j] >= bscstart && idxn[j] < bscend) {
682           col = idxn[j] - bscstart;
683           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684         } else {
685           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686 #if defined(PETSC_USE_CTABLE)
687           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688           data--;
689 #else
690           data = baij->colmap[idxn[j] / bs] - 1;
691 #endif
692           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693           else {
694             col = data + idxn[j] % bs;
695             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696           }
697         }
698       }
699     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705 {
706   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707   PetscReal     sum[2], *lnorm2;
708 
709   PetscFunctionBegin;
710   if (baij->size == 1) {
711     PetscCall(MatNorm(baij->A, type, norm));
712   } else {
713     if (type == NORM_FROBENIUS) {
714       PetscCall(PetscMalloc1(2, &lnorm2));
715       PetscCall(MatNorm(baij->A, type, lnorm2));
716       *lnorm2 = (*lnorm2) * (*lnorm2);
717       lnorm2++; /* squar power of norm(A) */
718       PetscCall(MatNorm(baij->B, type, lnorm2));
719       *lnorm2 = (*lnorm2) * (*lnorm2);
720       lnorm2--; /* squar power of norm(B) */
721       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
722       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
723       PetscCall(PetscFree(lnorm2));
724     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
725       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
726       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
727       PetscReal    *rsum, *rsum2, vabs;
728       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
730       MatScalar    *v;
731 
732       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
733       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
734       /* Amat */
735       v  = amat->a;
736       jj = amat->j;
737       for (brow = 0; brow < mbs; brow++) {
738         grow = bs * (rstart + brow);
739         nz   = amat->i[brow + 1] - amat->i[brow];
740         for (bcol = 0; bcol < nz; bcol++) {
741           gcol = bs * (rstart + *jj);
742           jj++;
743           for (col = 0; col < bs; col++) {
744             for (row = 0; row < bs; row++) {
745               vabs = PetscAbsScalar(*v);
746               v++;
747               rsum[gcol + col] += vabs;
748               /* non-diagonal block */
749               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
750             }
751           }
752         }
753         PetscCall(PetscLogFlops(nz * bs * bs));
754       }
755       /* Bmat */
756       v  = bmat->a;
757       jj = bmat->j;
758       for (brow = 0; brow < mbs; brow++) {
759         grow = bs * (rstart + brow);
760         nz   = bmat->i[brow + 1] - bmat->i[brow];
761         for (bcol = 0; bcol < nz; bcol++) {
762           gcol = bs * garray[*jj];
763           jj++;
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               vabs = PetscAbsScalar(*v);
767               v++;
768               rsum[gcol + col] += vabs;
769               rsum[grow + row] += vabs;
770             }
771           }
772         }
773         PetscCall(PetscLogFlops(nz * bs * bs));
774       }
775       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
776       *norm = 0.0;
777       for (col = 0; col < mat->cmap->N; col++) {
778         if (rsum2[col] > *norm) *norm = rsum2[col];
779       }
780       PetscCall(PetscFree2(rsum, rsum2));
781     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782   }
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787 {
788   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
789   PetscInt      nstash, reallocs;
790 
791   PetscFunctionBegin;
792   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
793 
794   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
795   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
799   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
800   PetscFunctionReturn(PETSC_SUCCESS);
801 }
802 
803 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804 {
805   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
807   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
808   PetscInt     *row, *col;
809   PetscBool     other_disassembled;
810   PetscMPIInt   n;
811   PetscBool     r1, r2, r3;
812   MatScalar    *val;
813 
814   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815   PetscFunctionBegin;
816   if (!baij->donotstash && !mat->nooffprocentries) {
817     while (1) {
818       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819       if (!flg) break;
820 
821       for (i = 0; i < n;) {
822         /* Now identify the consecutive vals belonging to the same row */
823         for (j = i, rstart = row[j]; j < n; j++) {
824           if (row[j] != rstart) break;
825         }
826         if (j < n) ncols = j - i;
827         else ncols = n - i;
828         /* Now assemble all these values with a single function call */
829         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830         i = j;
831       }
832     }
833     PetscCall(MatStashScatterEnd_Private(&mat->stash));
834     /* Now process the block-stash. Since the values are stashed column-oriented,
835        set the roworiented flag to column oriented, and after MatSetValues()
836        restore the original flags */
837     r1 = baij->roworiented;
838     r2 = a->roworiented;
839     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
840 
841     baij->roworiented = PETSC_FALSE;
842     a->roworiented    = PETSC_FALSE;
843 
844     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845     while (1) {
846       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847       if (!flg) break;
848 
849       for (i = 0; i < n;) {
850         /* Now identify the consecutive vals belonging to the same row */
851         for (j = i, rstart = row[j]; j < n; j++) {
852           if (row[j] != rstart) break;
853         }
854         if (j < n) ncols = j - i;
855         else ncols = n - i;
856         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857         i = j;
858       }
859     }
860     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
861 
862     baij->roworiented = r1;
863     a->roworiented    = r2;
864 
865     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866   }
867 
868   PetscCall(MatAssemblyBegin(baij->A, mode));
869   PetscCall(MatAssemblyEnd(baij->A, mode));
870 
871   /* determine if any processor has disassembled, if so we must
872      also disassemble ourselves, in order that we may reassemble. */
873   /*
874      if nonzero structure of submatrix B cannot change then we know that
875      no processor disassembled thus we can skip this stuff
876   */
877   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
878     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
879     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880   }
881 
882   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
883   PetscCall(MatAssemblyBegin(baij->B, mode));
884   PetscCall(MatAssemblyEnd(baij->B, mode));
885 
886   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
887 
888   baij->rowvalues = NULL;
889 
890   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
891   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
893     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
899 #include <petscdraw.h>
900 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901 {
902   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
903   PetscInt          bs   = mat->rmap->bs;
904   PetscMPIInt       rank = baij->rank;
905   PetscBool         iascii, isdraw;
906   PetscViewer       sviewer;
907   PetscViewerFormat format;
908 
909   PetscFunctionBegin;
910   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
911   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
912   if (iascii) {
913     PetscCall(PetscViewerGetFormat(viewer, &format));
914     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915       MatInfo info;
916       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
917       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
918       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
919       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
920                                                    mat->rmap->bs, (double)info.memory));
921       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
924       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
925       PetscCall(PetscViewerFlush(viewer));
926       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
927       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
928       PetscCall(VecScatterView(baij->Mvctx, viewer));
929       PetscFunctionReturn(PETSC_SUCCESS);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
932       PetscFunctionReturn(PETSC_SUCCESS);
933     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
934       PetscFunctionReturn(PETSC_SUCCESS);
935     }
936   }
937 
938   if (isdraw) {
939     PetscDraw draw;
940     PetscBool isnull;
941     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
942     PetscCall(PetscDrawIsNull(draw, &isnull));
943     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944   }
945 
946   {
947     /* assemble the entire matrix onto first processor. */
948     Mat           A;
949     Mat_SeqSBAIJ *Aloc;
950     Mat_SeqBAIJ  *Bloc;
951     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952     MatScalar    *a;
953     const char   *matname;
954 
955     /* Should this be the same type as mat? */
956     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957     if (rank == 0) {
958       PetscCall(MatSetSizes(A, M, N, M, N));
959     } else {
960       PetscCall(MatSetSizes(A, 0, 0, M, N));
961     }
962     PetscCall(MatSetType(A, MATMPISBAIJ));
963     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
964     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
968     ai   = Aloc->i;
969     aj   = Aloc->j;
970     a    = Aloc->a;
971     PetscCall(PetscMalloc1(bs, &rvals));
972 
973     for (i = 0; i < mbs; i++) {
974       rvals[0] = bs * (baij->rstartbs + i);
975       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976       for (j = ai[i]; j < ai[i + 1]; j++) {
977         col = (baij->cstartbs + aj[j]) * bs;
978         for (k = 0; k < bs; k++) {
979           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
980           col++;
981           a += bs;
982         }
983       }
984     }
985     /* copy over the B part */
986     Bloc = (Mat_SeqBAIJ *)baij->B->data;
987     ai   = Bloc->i;
988     aj   = Bloc->j;
989     a    = Bloc->a;
990     for (i = 0; i < mbs; i++) {
991       rvals[0] = bs * (baij->rstartbs + i);
992       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993       for (j = ai[i]; j < ai[i + 1]; j++) {
994         col = baij->garray[aj[j]] * bs;
995         for (k = 0; k < bs; k++) {
996           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
997           col++;
998           a += bs;
999         }
1000       }
1001     }
1002     PetscCall(PetscFree(rvals));
1003     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1004     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005     /*
1006        Everyone has to call to draw the matrix since the graphics waits are
1007        synchronized across all processors that share the PetscDraw object
1008     */
1009     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1010     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011     if (rank == 0) {
1012       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
1013       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014     }
1015     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1016     PetscCall(PetscViewerFlush(viewer));
1017     PetscCall(MatDestroy(&A));
1018   }
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
1022 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1024 
1025 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026 {
1027   PetscBool iascii, isdraw, issocket, isbinary;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1032   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1033   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034   if (iascii || isdraw || issocket) {
1035     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1036   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041 {
1042   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1043   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1044   PetscScalar       *from;
1045   const PetscScalar *x;
1046 
1047   PetscFunctionBegin;
1048   /* diagonal part */
1049   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1050   PetscCall(VecSet(a->slvec1b, 0.0));
1051 
1052   /* subdiagonal part */
1053   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1054   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1055 
1056   /* copy x into the vec slvec0 */
1057   PetscCall(VecGetArray(a->slvec0, &from));
1058   PetscCall(VecGetArrayRead(xx, &x));
1059 
1060   PetscCall(PetscArraycpy(from, x, bs * mbs));
1061   PetscCall(VecRestoreArray(a->slvec0, &from));
1062   PetscCall(VecRestoreArrayRead(xx, &x));
1063 
1064   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1065   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1066   /* supperdiagonal part */
1067   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1072 {
1073   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1074   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1075   PetscScalar       *from;
1076   const PetscScalar *x;
1077 
1078   PetscFunctionBegin;
1079   /* diagonal part */
1080   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1081   PetscCall(VecSet(a->slvec1b, 0.0));
1082 
1083   /* subdiagonal part */
1084   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1085 
1086   /* copy x into the vec slvec0 */
1087   PetscCall(VecGetArray(a->slvec0, &from));
1088   PetscCall(VecGetArrayRead(xx, &x));
1089 
1090   PetscCall(PetscArraycpy(from, x, bs * mbs));
1091   PetscCall(VecRestoreArray(a->slvec0, &from));
1092   PetscCall(VecRestoreArrayRead(xx, &x));
1093 
1094   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1095   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1096   /* supperdiagonal part */
1097   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1098   PetscFunctionReturn(PETSC_SUCCESS);
1099 }
1100 
1101 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1102 {
1103   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1104   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1105   PetscScalar       *from, zero       = 0.0;
1106   const PetscScalar *x;
1107 
1108   PetscFunctionBegin;
1109   /* diagonal part */
1110   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1111   PetscCall(VecSet(a->slvec1b, zero));
1112 
1113   /* subdiagonal part */
1114   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1115   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1116 
1117   /* copy x into the vec slvec0 */
1118   PetscCall(VecGetArray(a->slvec0, &from));
1119   PetscCall(VecGetArrayRead(xx, &x));
1120   PetscCall(PetscArraycpy(from, x, bs * mbs));
1121   PetscCall(VecRestoreArray(a->slvec0, &from));
1122 
1123   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1124   PetscCall(VecRestoreArrayRead(xx, &x));
1125   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1126 
1127   /* supperdiagonal part */
1128   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133 {
1134   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1135   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1136   PetscScalar       *from, zero       = 0.0;
1137   const PetscScalar *x;
1138 
1139   PetscFunctionBegin;
1140   /* diagonal part */
1141   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1142   PetscCall(VecSet(a->slvec1b, zero));
1143 
1144   /* subdiagonal part */
1145   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1146 
1147   /* copy x into the vec slvec0 */
1148   PetscCall(VecGetArray(a->slvec0, &from));
1149   PetscCall(VecGetArrayRead(xx, &x));
1150   PetscCall(PetscArraycpy(from, x, bs * mbs));
1151   PetscCall(VecRestoreArray(a->slvec0, &from));
1152 
1153   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1154   PetscCall(VecRestoreArrayRead(xx, &x));
1155   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1156 
1157   /* supperdiagonal part */
1158   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1159   PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1167 {
1168   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1172   PetscCall(MatGetDiagonal(a->A, v));
1173   PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175 
1176 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1177 {
1178   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1179 
1180   PetscFunctionBegin;
1181   PetscCall(MatScale(a->A, aa));
1182   PetscCall(MatScale(a->B, aa));
1183   PetscFunctionReturn(PETSC_SUCCESS);
1184 }
1185 
1186 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1187 {
1188   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1189   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1190   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1191   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1192   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1193 
1194   PetscFunctionBegin;
1195   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1196   mat->getrowactive = PETSC_TRUE;
1197 
1198   if (!mat->rowvalues && (idx || v)) {
1199     /*
1200         allocate enough space to hold information from the longest row.
1201     */
1202     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1203     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1204     PetscInt      max = 1, mbs = mat->mbs, tmp;
1205     for (i = 0; i < mbs; i++) {
1206       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1207       if (max < tmp) max = tmp;
1208     }
1209     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1210   }
1211 
1212   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1213   lrow = row - brstart; /* local row index */
1214 
1215   pvA = &vworkA;
1216   pcA = &cworkA;
1217   pvB = &vworkB;
1218   pcB = &cworkB;
1219   if (!v) {
1220     pvA = NULL;
1221     pvB = NULL;
1222   }
1223   if (!idx) {
1224     pcA = NULL;
1225     if (!v) pcB = NULL;
1226   }
1227   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1228   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1229   nztot = nzA + nzB;
1230 
1231   cmap = mat->garray;
1232   if (v || idx) {
1233     if (nztot) {
1234       /* Sort by increasing column numbers, assuming A and B already sorted */
1235       PetscInt imark = -1;
1236       if (v) {
1237         *v = v_p = mat->rowvalues;
1238         for (i = 0; i < nzB; i++) {
1239           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1240           else break;
1241         }
1242         imark = i;
1243         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1244         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1245       }
1246       if (idx) {
1247         *idx = idx_p = mat->rowindices;
1248         if (imark > -1) {
1249           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1250         } else {
1251           for (i = 0; i < nzB; i++) {
1252             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253             else break;
1254           }
1255           imark = i;
1256         }
1257         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1258         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259       }
1260     } else {
1261       if (idx) *idx = NULL;
1262       if (v) *v = NULL;
1263     }
1264   }
1265   *nz = nztot;
1266   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1267   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1272 {
1273   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1274 
1275   PetscFunctionBegin;
1276   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1277   baij->getrowactive = PETSC_FALSE;
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1282 {
1283   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1284   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1285 
1286   PetscFunctionBegin;
1287   aA->getrow_utriangular = PETSC_TRUE;
1288   PetscFunctionReturn(PETSC_SUCCESS);
1289 }
1290 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1291 {
1292   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1293   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1294 
1295   PetscFunctionBegin;
1296   aA->getrow_utriangular = PETSC_FALSE;
1297   PetscFunctionReturn(PETSC_SUCCESS);
1298 }
1299 
1300 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1301 {
1302   PetscFunctionBegin;
1303   if (PetscDefined(USE_COMPLEX)) {
1304     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1305 
1306     PetscCall(MatConjugate(a->A));
1307     PetscCall(MatConjugate(a->B));
1308   }
1309   PetscFunctionReturn(PETSC_SUCCESS);
1310 }
1311 
1312 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1315 
1316   PetscFunctionBegin;
1317   PetscCall(MatRealPart(a->A));
1318   PetscCall(MatRealPart(a->B));
1319   PetscFunctionReturn(PETSC_SUCCESS);
1320 }
1321 
1322 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1325 
1326   PetscFunctionBegin;
1327   PetscCall(MatImaginaryPart(a->A));
1328   PetscCall(MatImaginaryPart(a->B));
1329   PetscFunctionReturn(PETSC_SUCCESS);
1330 }
1331 
1332 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1333    Input: isrow       - distributed(parallel),
1334           iscol_local - locally owned (seq)
1335 */
1336 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1337 {
1338   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1339   const PetscInt *ptr1, *ptr2;
1340 
1341   PetscFunctionBegin;
1342   PetscCall(ISGetLocalSize(isrow, &sz1));
1343   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1344   if (sz1 > sz2) {
1345     *flg = PETSC_FALSE;
1346     PetscFunctionReturn(PETSC_SUCCESS);
1347   }
1348 
1349   PetscCall(ISGetIndices(isrow, &ptr1));
1350   PetscCall(ISGetIndices(iscol_local, &ptr2));
1351 
1352   PetscCall(PetscMalloc1(sz1, &a1));
1353   PetscCall(PetscMalloc1(sz2, &a2));
1354   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1355   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1356   PetscCall(PetscSortInt(sz1, a1));
1357   PetscCall(PetscSortInt(sz2, a2));
1358 
1359   nmatch = 0;
1360   k      = 0;
1361   for (i = 0; i < sz1; i++) {
1362     for (j = k; j < sz2; j++) {
1363       if (a1[i] == a2[j]) {
1364         k = j;
1365         nmatch++;
1366         break;
1367       }
1368     }
1369   }
1370   PetscCall(ISRestoreIndices(isrow, &ptr1));
1371   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1372   PetscCall(PetscFree(a1));
1373   PetscCall(PetscFree(a2));
1374   if (nmatch < sz1) {
1375     *flg = PETSC_FALSE;
1376   } else {
1377     *flg = PETSC_TRUE;
1378   }
1379   PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381 
1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1383 {
1384   IS        iscol_local;
1385   PetscInt  csize;
1386   PetscBool isequal;
1387 
1388   PetscFunctionBegin;
1389   PetscCall(ISGetLocalSize(iscol, &csize));
1390   if (call == MAT_REUSE_MATRIX) {
1391     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1392     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1393   } else {
1394     PetscBool issorted;
1395 
1396     PetscCall(ISAllGather(iscol, &iscol_local));
1397     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1398     PetscCall(ISSorted(iscol_local, &issorted));
1399     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1400   }
1401 
1402   /* now call MatCreateSubMatrix_MPIBAIJ() */
1403   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1404   if (call == MAT_INITIAL_MATRIX) {
1405     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1406     PetscCall(ISDestroy(&iscol_local));
1407   }
1408   PetscFunctionReturn(PETSC_SUCCESS);
1409 }
1410 
1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(MatZeroEntries(l->A));
1417   PetscCall(MatZeroEntries(l->B));
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1422 {
1423   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1424   Mat            A = a->A, B = a->B;
1425   PetscLogDouble isend[5], irecv[5];
1426 
1427   PetscFunctionBegin;
1428   info->block_size = (PetscReal)matin->rmap->bs;
1429 
1430   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1431 
1432   isend[0] = info->nz_used;
1433   isend[1] = info->nz_allocated;
1434   isend[2] = info->nz_unneeded;
1435   isend[3] = info->memory;
1436   isend[4] = info->mallocs;
1437 
1438   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1439 
1440   isend[0] += info->nz_used;
1441   isend[1] += info->nz_allocated;
1442   isend[2] += info->nz_unneeded;
1443   isend[3] += info->memory;
1444   isend[4] += info->mallocs;
1445   if (flag == MAT_LOCAL) {
1446     info->nz_used      = isend[0];
1447     info->nz_allocated = isend[1];
1448     info->nz_unneeded  = isend[2];
1449     info->memory       = isend[3];
1450     info->mallocs      = isend[4];
1451   } else if (flag == MAT_GLOBAL_MAX) {
1452     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1453 
1454     info->nz_used      = irecv[0];
1455     info->nz_allocated = irecv[1];
1456     info->nz_unneeded  = irecv[2];
1457     info->memory       = irecv[3];
1458     info->mallocs      = irecv[4];
1459   } else if (flag == MAT_GLOBAL_SUM) {
1460     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1461 
1462     info->nz_used      = irecv[0];
1463     info->nz_allocated = irecv[1];
1464     info->nz_unneeded  = irecv[2];
1465     info->memory       = irecv[3];
1466     info->mallocs      = irecv[4];
1467   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1468   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1469   info->fill_ratio_needed = 0;
1470   info->factor_mallocs    = 0;
1471   PetscFunctionReturn(PETSC_SUCCESS);
1472 }
1473 
1474 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1475 {
1476   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1477   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1478 
1479   PetscFunctionBegin;
1480   switch (op) {
1481   case MAT_NEW_NONZERO_LOCATIONS:
1482   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1483   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1484   case MAT_KEEP_NONZERO_PATTERN:
1485   case MAT_SUBMAT_SINGLEIS:
1486   case MAT_NEW_NONZERO_LOCATION_ERR:
1487     MatCheckPreallocated(A, 1);
1488     PetscCall(MatSetOption(a->A, op, flg));
1489     PetscCall(MatSetOption(a->B, op, flg));
1490     break;
1491   case MAT_ROW_ORIENTED:
1492     MatCheckPreallocated(A, 1);
1493     a->roworiented = flg;
1494 
1495     PetscCall(MatSetOption(a->A, op, flg));
1496     PetscCall(MatSetOption(a->B, op, flg));
1497     break;
1498   case MAT_FORCE_DIAGONAL_ENTRIES:
1499   case MAT_SORTED_FULL:
1500     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1501     break;
1502   case MAT_IGNORE_OFF_PROC_ENTRIES:
1503     a->donotstash = flg;
1504     break;
1505   case MAT_USE_HASH_TABLE:
1506     a->ht_flag = flg;
1507     break;
1508   case MAT_HERMITIAN:
1509     MatCheckPreallocated(A, 1);
1510     PetscCall(MatSetOption(a->A, op, flg));
1511 #if defined(PETSC_USE_COMPLEX)
1512     if (flg) { /* need different mat-vec ops */
1513       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1514       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1515       A->ops->multtranspose    = NULL;
1516       A->ops->multtransposeadd = NULL;
1517       A->symmetric             = PETSC_BOOL3_FALSE;
1518     }
1519 #endif
1520     break;
1521   case MAT_SPD:
1522   case MAT_SYMMETRIC:
1523     MatCheckPreallocated(A, 1);
1524     PetscCall(MatSetOption(a->A, op, flg));
1525 #if defined(PETSC_USE_COMPLEX)
1526     if (flg) { /* restore to use default mat-vec ops */
1527       A->ops->mult             = MatMult_MPISBAIJ;
1528       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1529       A->ops->multtranspose    = MatMult_MPISBAIJ;
1530       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1531     }
1532 #endif
1533     break;
1534   case MAT_STRUCTURALLY_SYMMETRIC:
1535     MatCheckPreallocated(A, 1);
1536     PetscCall(MatSetOption(a->A, op, flg));
1537     break;
1538   case MAT_SYMMETRY_ETERNAL:
1539   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1540     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1541     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1542     break;
1543   case MAT_SPD_ETERNAL:
1544     break;
1545   case MAT_IGNORE_LOWER_TRIANGULAR:
1546     aA->ignore_ltriangular = flg;
1547     break;
1548   case MAT_ERROR_LOWER_TRIANGULAR:
1549     aA->ignore_ltriangular = flg;
1550     break;
1551   case MAT_GETROW_UPPERTRIANGULAR:
1552     aA->getrow_utriangular = flg;
1553     break;
1554   default:
1555     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1556   }
1557   PetscFunctionReturn(PETSC_SUCCESS);
1558 }
1559 
1560 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1561 {
1562   PetscFunctionBegin;
1563   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1564   if (reuse == MAT_INITIAL_MATRIX) {
1565     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1566   } else if (reuse == MAT_REUSE_MATRIX) {
1567     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1568   }
1569   PetscFunctionReturn(PETSC_SUCCESS);
1570 }
1571 
1572 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1573 {
1574   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1575   Mat           a = baij->A, b = baij->B;
1576   PetscInt      nv, m, n;
1577   PetscBool     flg;
1578 
1579   PetscFunctionBegin;
1580   if (ll != rr) {
1581     PetscCall(VecEqual(ll, rr, &flg));
1582     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1583   }
1584   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1585 
1586   PetscCall(MatGetLocalSize(mat, &m, &n));
1587   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1588 
1589   PetscCall(VecGetLocalSize(rr, &nv));
1590   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1591 
1592   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593 
1594   /* left diagonalscale the off-diagonal part */
1595   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1596 
1597   /* scale the diagonal part */
1598   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1599 
1600   /* right diagonalscale the off-diagonal part */
1601   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1602   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1607 {
1608   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(MatSetUnfactored(a->A));
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1616 
1617 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1618 {
1619   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1620   Mat           a, b, c, d;
1621   PetscBool     flg;
1622 
1623   PetscFunctionBegin;
1624   a = matA->A;
1625   b = matA->B;
1626   c = matB->A;
1627   d = matB->B;
1628 
1629   PetscCall(MatEqual(a, c, &flg));
1630   if (flg) PetscCall(MatEqual(b, d, &flg));
1631   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1636 {
1637   PetscBool isbaij;
1638 
1639   PetscFunctionBegin;
1640   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1641   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1642   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1643   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1644     PetscCall(MatGetRowUpperTriangular(A));
1645     PetscCall(MatCopy_Basic(A, B, str));
1646     PetscCall(MatRestoreRowUpperTriangular(A));
1647   } else {
1648     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1649     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1650 
1651     PetscCall(MatCopy(a->A, b->A, str));
1652     PetscCall(MatCopy(a->B, b->B, str));
1653   }
1654   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1659 {
1660   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1661   PetscBLASInt  bnz, one                          = 1;
1662   Mat_SeqSBAIJ *xa, *ya;
1663   Mat_SeqBAIJ  *xb, *yb;
1664 
1665   PetscFunctionBegin;
1666   if (str == SAME_NONZERO_PATTERN) {
1667     PetscScalar alpha = a;
1668     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1669     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1670     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1671     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1672     xb = (Mat_SeqBAIJ *)xx->B->data;
1673     yb = (Mat_SeqBAIJ *)yy->B->data;
1674     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1675     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1676     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1677   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1678     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1679     PetscCall(MatAXPY_Basic(Y, a, X, str));
1680     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1681   } else {
1682     Mat       B;
1683     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1684     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1685     PetscCall(MatGetRowUpperTriangular(X));
1686     PetscCall(MatGetRowUpperTriangular(Y));
1687     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1688     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1689     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1690     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1691     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1692     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1693     PetscCall(MatSetType(B, MATMPISBAIJ));
1694     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1695     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1696     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1697     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1698     PetscCall(MatHeaderMerge(Y, &B));
1699     PetscCall(PetscFree(nnz_d));
1700     PetscCall(PetscFree(nnz_o));
1701     PetscCall(MatRestoreRowUpperTriangular(X));
1702     PetscCall(MatRestoreRowUpperTriangular(Y));
1703   }
1704   PetscFunctionReturn(PETSC_SUCCESS);
1705 }
1706 
1707 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1708 {
1709   PetscInt  i;
1710   PetscBool flg;
1711 
1712   PetscFunctionBegin;
1713   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1714   for (i = 0; i < n; i++) {
1715     PetscCall(ISEqual(irow[i], icol[i], &flg));
1716     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1717   }
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1722 {
1723   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1724   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1725 
1726   PetscFunctionBegin;
1727   if (!Y->preallocated) {
1728     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1729   } else if (!aij->nz) {
1730     PetscInt nonew = aij->nonew;
1731     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1732     aij->nonew = nonew;
1733   }
1734   PetscCall(MatShift_Basic(Y, a));
1735   PetscFunctionReturn(PETSC_SUCCESS);
1736 }
1737 
1738 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1739 {
1740   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1741 
1742   PetscFunctionBegin;
1743   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1744   PetscCall(MatMissingDiagonal(a->A, missing, d));
1745   if (d) {
1746     PetscInt rstart;
1747     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1748     *d += rstart / A->rmap->bs;
1749   }
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1754 {
1755   PetscFunctionBegin;
1756   *a = ((Mat_MPISBAIJ *)A->data)->A;
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1761                                        MatGetRow_MPISBAIJ,
1762                                        MatRestoreRow_MPISBAIJ,
1763                                        MatMult_MPISBAIJ,
1764                                        /*  4*/ MatMultAdd_MPISBAIJ,
1765                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1766                                        MatMultAdd_MPISBAIJ,
1767                                        NULL,
1768                                        NULL,
1769                                        NULL,
1770                                        /* 10*/ NULL,
1771                                        NULL,
1772                                        NULL,
1773                                        MatSOR_MPISBAIJ,
1774                                        MatTranspose_MPISBAIJ,
1775                                        /* 15*/ MatGetInfo_MPISBAIJ,
1776                                        MatEqual_MPISBAIJ,
1777                                        MatGetDiagonal_MPISBAIJ,
1778                                        MatDiagonalScale_MPISBAIJ,
1779                                        MatNorm_MPISBAIJ,
1780                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1781                                        MatAssemblyEnd_MPISBAIJ,
1782                                        MatSetOption_MPISBAIJ,
1783                                        MatZeroEntries_MPISBAIJ,
1784                                        /* 24*/ NULL,
1785                                        NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        /* 29*/ MatSetUp_MPI_Hash,
1790                                        NULL,
1791                                        NULL,
1792                                        MatGetDiagonalBlock_MPISBAIJ,
1793                                        NULL,
1794                                        /* 34*/ MatDuplicate_MPISBAIJ,
1795                                        NULL,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        /* 39*/ MatAXPY_MPISBAIJ,
1800                                        MatCreateSubMatrices_MPISBAIJ,
1801                                        MatIncreaseOverlap_MPISBAIJ,
1802                                        MatGetValues_MPISBAIJ,
1803                                        MatCopy_MPISBAIJ,
1804                                        /* 44*/ NULL,
1805                                        MatScale_MPISBAIJ,
1806                                        MatShift_MPISBAIJ,
1807                                        NULL,
1808                                        NULL,
1809                                        /* 49*/ NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        /* 54*/ NULL,
1815                                        NULL,
1816                                        MatSetUnfactored_MPISBAIJ,
1817                                        NULL,
1818                                        MatSetValuesBlocked_MPISBAIJ,
1819                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1820                                        NULL,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        /* 64*/ NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1830                                        NULL,
1831                                        MatConvert_MPISBAIJ_Basic,
1832                                        NULL,
1833                                        NULL,
1834                                        /* 74*/ NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        /* 79*/ NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        MatLoad_MPISBAIJ,
1844                                        /* 84*/ NULL,
1845                                        NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        /* 89*/ NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        /* 94*/ NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        /* 99*/ NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        MatConjugate_MPISBAIJ,
1863                                        NULL,
1864                                        /*104*/ NULL,
1865                                        MatRealPart_MPISBAIJ,
1866                                        MatImaginaryPart_MPISBAIJ,
1867                                        MatGetRowUpperTriangular_MPISBAIJ,
1868                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1869                                        /*109*/ NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        MatMissingDiagonal_MPISBAIJ,
1874                                        /*114*/ NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        /*119*/ NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        /*124*/ NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        /*129*/ NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        /*134*/ NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        /*139*/ MatSetBlockSizes_Default,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1905                                        NULL,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        /*150*/ NULL,
1911                                        NULL};
1912 
1913 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1914 {
1915   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1916   PetscInt      i, mbs, Mbs;
1917   PetscMPIInt   size;
1918 
1919   PetscFunctionBegin;
1920   if (B->hash_active) {
1921     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
1922     B->hash_active = PETSC_FALSE;
1923   }
1924   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1925   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1926   PetscCall(PetscLayoutSetUp(B->rmap));
1927   PetscCall(PetscLayoutSetUp(B->cmap));
1928   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1929   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1930   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1931 
1932   mbs = B->rmap->n / bs;
1933   Mbs = B->rmap->N / bs;
1934   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1935 
1936   B->rmap->bs = bs;
1937   b->bs2      = bs * bs;
1938   b->mbs      = mbs;
1939   b->Mbs      = Mbs;
1940   b->nbs      = B->cmap->n / bs;
1941   b->Nbs      = B->cmap->N / bs;
1942 
1943   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1944   b->rstartbs = B->rmap->rstart / bs;
1945   b->rendbs   = B->rmap->rend / bs;
1946 
1947   b->cstartbs = B->cmap->rstart / bs;
1948   b->cendbs   = B->cmap->rend / bs;
1949 
1950 #if defined(PETSC_USE_CTABLE)
1951   PetscCall(PetscHMapIDestroy(&b->colmap));
1952 #else
1953   PetscCall(PetscFree(b->colmap));
1954 #endif
1955   PetscCall(PetscFree(b->garray));
1956   PetscCall(VecDestroy(&b->lvec));
1957   PetscCall(VecScatterDestroy(&b->Mvctx));
1958   PetscCall(VecDestroy(&b->slvec0));
1959   PetscCall(VecDestroy(&b->slvec0b));
1960   PetscCall(VecDestroy(&b->slvec1));
1961   PetscCall(VecDestroy(&b->slvec1a));
1962   PetscCall(VecDestroy(&b->slvec1b));
1963   PetscCall(VecScatterDestroy(&b->sMvctx));
1964 
1965   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1966   PetscCall(MatDestroy(&b->B));
1967   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1968   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1969   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1970 
1971   PetscCall(MatDestroy(&b->A));
1972   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1973   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1974   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1975 
1976   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1977   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1978 
1979   B->preallocated  = PETSC_TRUE;
1980   B->was_assembled = PETSC_FALSE;
1981   B->assembled     = PETSC_FALSE;
1982   PetscFunctionReturn(PETSC_SUCCESS);
1983 }
1984 
1985 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1986 {
1987   PetscInt        m, rstart, cend;
1988   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1989   const PetscInt *JJ          = NULL;
1990   PetscScalar    *values      = NULL;
1991   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1992   PetscBool       nooffprocentries;
1993 
1994   PetscFunctionBegin;
1995   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1996   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1997   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1998   PetscCall(PetscLayoutSetUp(B->rmap));
1999   PetscCall(PetscLayoutSetUp(B->cmap));
2000   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2001   m      = B->rmap->n / bs;
2002   rstart = B->rmap->rstart / bs;
2003   cend   = B->cmap->rend / bs;
2004 
2005   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2006   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2007   for (i = 0; i < m; i++) {
2008     nz = ii[i + 1] - ii[i];
2009     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2010     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2011     JJ = jj + ii[i];
2012     bd = 0;
2013     for (j = 0; j < nz; j++) {
2014       if (*JJ >= i + rstart) break;
2015       JJ++;
2016       bd++;
2017     }
2018     d = 0;
2019     for (; j < nz; j++) {
2020       if (*JJ++ >= cend) break;
2021       d++;
2022     }
2023     d_nnz[i] = d;
2024     o_nnz[i] = nz - d - bd;
2025     nz       = nz - bd;
2026     nz_max   = PetscMax(nz_max, nz);
2027   }
2028   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2029   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2030   PetscCall(PetscFree2(d_nnz, o_nnz));
2031 
2032   values = (PetscScalar *)V;
2033   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2034   for (i = 0; i < m; i++) {
2035     PetscInt        row   = i + rstart;
2036     PetscInt        ncols = ii[i + 1] - ii[i];
2037     const PetscInt *icols = jj + ii[i];
2038     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2039       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2040       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2041     } else { /* block ordering does not match so we can only insert one block at a time. */
2042       PetscInt j;
2043       for (j = 0; j < ncols; j++) {
2044         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2045         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2046       }
2047     }
2048   }
2049 
2050   if (!V) PetscCall(PetscFree(values));
2051   nooffprocentries    = B->nooffprocentries;
2052   B->nooffprocentries = PETSC_TRUE;
2053   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2054   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2055   B->nooffprocentries = nooffprocentries;
2056 
2057   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2058   PetscFunctionReturn(PETSC_SUCCESS);
2059 }
2060 
2061 /*MC
2062    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2063    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2064    the matrix is stored.
2065 
2066    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2067    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2068 
2069    Options Database Key:
2070 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2071 
2072    Level: beginner
2073 
2074    Note:
2075      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2076      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2077 
2078 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2079 M*/
2080 
2081 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2082 {
2083   Mat_MPISBAIJ *b;
2084   PetscBool     flg = PETSC_FALSE;
2085 
2086   PetscFunctionBegin;
2087   PetscCall(PetscNew(&b));
2088   B->data = (void *)b;
2089   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2090 
2091   B->ops->destroy = MatDestroy_MPISBAIJ;
2092   B->ops->view    = MatView_MPISBAIJ;
2093   B->assembled    = PETSC_FALSE;
2094   B->insertmode   = NOT_SET_VALUES;
2095 
2096   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2097   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2098 
2099   /* build local table of row and column ownerships */
2100   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2101 
2102   /* build cache for off array entries formed */
2103   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2104 
2105   b->donotstash  = PETSC_FALSE;
2106   b->colmap      = NULL;
2107   b->garray      = NULL;
2108   b->roworiented = PETSC_TRUE;
2109 
2110   /* stuff used in block assembly */
2111   b->barray = NULL;
2112 
2113   /* stuff used for matrix vector multiply */
2114   b->lvec    = NULL;
2115   b->Mvctx   = NULL;
2116   b->slvec0  = NULL;
2117   b->slvec0b = NULL;
2118   b->slvec1  = NULL;
2119   b->slvec1a = NULL;
2120   b->slvec1b = NULL;
2121   b->sMvctx  = NULL;
2122 
2123   /* stuff for MatGetRow() */
2124   b->rowindices   = NULL;
2125   b->rowvalues    = NULL;
2126   b->getrowactive = PETSC_FALSE;
2127 
2128   /* hash table stuff */
2129   b->ht           = NULL;
2130   b->hd           = NULL;
2131   b->ht_size      = 0;
2132   b->ht_flag      = PETSC_FALSE;
2133   b->ht_fact      = 0;
2134   b->ht_total_ct  = 0;
2135   b->ht_insert_ct = 0;
2136 
2137   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2138   b->ijonly = PETSC_FALSE;
2139 
2140   b->in_loc = NULL;
2141   b->v_loc  = NULL;
2142   b->n_loc  = 0;
2143 
2144   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2145   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2147   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2148 #if defined(PETSC_HAVE_ELEMENTAL)
2149   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2150 #endif
2151 #if defined(PETSC_HAVE_SCALAPACK)
2152   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2153 #endif
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2155   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2156 
2157   B->symmetric                   = PETSC_BOOL3_TRUE;
2158   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2159   B->symmetry_eternal            = PETSC_TRUE;
2160   B->structural_symmetry_eternal = PETSC_TRUE;
2161 #if defined(PETSC_USE_COMPLEX)
2162   B->hermitian = PETSC_BOOL3_FALSE;
2163 #else
2164   B->hermitian = PETSC_BOOL3_TRUE;
2165 #endif
2166 
2167   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2168   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2169   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2170   if (flg) {
2171     PetscReal fact = 1.39;
2172     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2173     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2174     if (fact <= 1.0) fact = 1.39;
2175     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2176     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2177   }
2178   PetscOptionsEnd();
2179   PetscFunctionReturn(PETSC_SUCCESS);
2180 }
2181 
2182 /*MC
2183    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2184 
2185    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2186    and `MATMPISBAIJ` otherwise.
2187 
2188    Options Database Key:
2189 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2190 
2191   Level: beginner
2192 
2193 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2194 M*/
2195 
2196 /*@C
2197    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2198    the user should preallocate the matrix storage by setting the parameters
2199    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2200    performance can be increased by more than a factor of 50.
2201 
2202    Collective
2203 
2204    Input Parameters:
2205 +  B - the matrix
2206 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2207           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2208 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2209            submatrix  (same for all local rows)
2210 .  d_nnz - array containing the number of block nonzeros in the various block rows
2211            in the upper triangular and diagonal part of the in diagonal portion of the local
2212            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2213            for the diagonal entry and set a value even if it is zero.
2214 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2215            submatrix (same for all local rows).
2216 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2217            off-diagonal portion of the local submatrix that is right of the diagonal
2218            (possibly different for each block row) or `NULL`.
2219 
2220    Options Database Keys:
2221 +   -mat_no_unroll - uses code that does not unroll the loops in the
2222                      block calculations (much slower)
2223 -   -mat_block_size - size of the blocks to use
2224 
2225    Level: intermediate
2226 
2227    Notes:
2228 
2229    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2230    than it must be used on all processors that share the object for that argument.
2231 
2232    If the *_nnz parameter is given then the *_nz parameter is ignored
2233 
2234    Storage Information:
2235    For a square global matrix we define each processor's diagonal portion
2236    to be its local rows and the corresponding columns (a square submatrix);
2237    each processor's off-diagonal portion encompasses the remainder of the
2238    local matrix (a rectangular submatrix).
2239 
2240    The user can specify preallocated storage for the diagonal part of
2241    the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2242    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2243    memory allocation.  Likewise, specify preallocated storage for the
2244    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2245 
2246    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2247    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2248    You can also run with the option `-info` and look for messages with the string
2249    malloc in them to see if additional memory allocation was needed.
2250 
2251    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2252    the figure below we depict these three local rows and all columns (0-11).
2253 
2254 .vb
2255            0 1 2 3 4 5 6 7 8 9 10 11
2256           --------------------------
2257    row 3  |. . . d d d o o o o  o  o
2258    row 4  |. . . d d d o o o o  o  o
2259    row 5  |. . . d d d o o o o  o  o
2260           --------------------------
2261 .ve
2262 
2263    Thus, any entries in the d locations are stored in the d (diagonal)
2264    submatrix, and any entries in the o locations are stored in the
2265    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2266    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2267 
2268    Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2269    plus the diagonal part of the d matrix,
2270    and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2271 
2272    In general, for PDE problems in which most nonzeros are near the diagonal,
2273    one expects `d_nz` >> `o_nz`.
2274 
2275 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2276 @*/
2277 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2278 {
2279   PetscFunctionBegin;
2280   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2281   PetscValidType(B, 1);
2282   PetscValidLogicalCollectiveInt(B, bs, 2);
2283   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2284   PetscFunctionReturn(PETSC_SUCCESS);
2285 }
2286 
2287 /*@C
2288    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2289    (block compressed row).  For good matrix assembly performance
2290    the user should preallocate the matrix storage by setting the parameters
2291    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2292 
2293    Collective
2294 
2295    Input Parameters:
2296 +  comm - MPI communicator
2297 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2298           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2299 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2300            This value should be the same as the local size used in creating the
2301            y vector for the matrix-vector product y = Ax.
2302 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2303            This value should be the same as the local size used in creating the
2304            x vector for the matrix-vector product y = Ax.
2305 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2306 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2307 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2308            submatrix (same for all local rows)
2309 .  d_nnz - array containing the number of block nonzeros in the various block rows
2310            in the upper triangular portion of the in diagonal portion of the local
2311            (possibly different for each block block row) or `NULL`.
2312            If you plan to factor the matrix you must leave room for the diagonal entry and
2313            set its value even if it is zero.
2314 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2315            submatrix (same for all local rows).
2316 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2317            off-diagonal portion of the local submatrix (possibly different for
2318            each block row) or `NULL`.
2319 
2320    Output Parameter:
2321 .  A - the matrix
2322 
2323    Options Database Keys:
2324 +   -mat_no_unroll - uses code that does not unroll the loops in the
2325                      block calculations (much slower)
2326 .   -mat_block_size - size of the blocks to use
2327 -   -mat_mpi - use the parallel matrix data structures even on one processor
2328                (defaults to using SeqBAIJ format on one processor)
2329 
2330    Level: intermediate
2331 
2332    Notes:
2333    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2334    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2335    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2336 
2337    The number of rows and columns must be divisible by blocksize.
2338    This matrix type does not support complex Hermitian operation.
2339 
2340    The user MUST specify either the local or global matrix dimensions
2341    (possibly both).
2342 
2343    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2344    than it must be used on all processors that share the object for that argument.
2345 
2346    If the *_nnz parameter is given then the *_nz parameter is ignored
2347 
2348    Storage Information:
2349    For a square global matrix we define each processor's diagonal portion
2350    to be its local rows and the corresponding columns (a square submatrix);
2351    each processor's off-diagonal portion encompasses the remainder of the
2352    local matrix (a rectangular submatrix).
2353 
2354    The user can specify preallocated storage for the diagonal part of
2355    the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2356    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2357    memory allocation. Likewise, specify preallocated storage for the
2358    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2359 
2360    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2361    the figure below we depict these three local rows and all columns (0-11).
2362 
2363 .vb
2364            0 1 2 3 4 5 6 7 8 9 10 11
2365           --------------------------
2366    row 3  |. . . d d d o o o o  o  o
2367    row 4  |. . . d d d o o o o  o  o
2368    row 5  |. . . d d d o o o o  o  o
2369           --------------------------
2370 .ve
2371 
2372    Thus, any entries in the d locations are stored in the d (diagonal)
2373    submatrix, and any entries in the o locations are stored in the
2374    o (off-diagonal) submatrix. Note that the d matrix is stored in
2375    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2376 
2377    Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2378    plus the diagonal part of the d matrix,
2379    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2380    In general, for PDE problems in which most nonzeros are near the diagonal,
2381    one expects `d_nz` >> `o_nz`.
2382 
2383 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2384 @*/
2385 
2386 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2387 {
2388   PetscMPIInt size;
2389 
2390   PetscFunctionBegin;
2391   PetscCall(MatCreate(comm, A));
2392   PetscCall(MatSetSizes(*A, m, n, M, N));
2393   PetscCallMPI(MPI_Comm_size(comm, &size));
2394   if (size > 1) {
2395     PetscCall(MatSetType(*A, MATMPISBAIJ));
2396     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2397   } else {
2398     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2399     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2400   }
2401   PetscFunctionReturn(PETSC_SUCCESS);
2402 }
2403 
2404 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2405 {
2406   Mat           mat;
2407   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2408   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2409   PetscScalar  *array;
2410 
2411   PetscFunctionBegin;
2412   *newmat = NULL;
2413 
2414   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2415   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2416   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2417   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2418   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2419 
2420   mat->factortype   = matin->factortype;
2421   mat->preallocated = PETSC_TRUE;
2422   mat->assembled    = PETSC_TRUE;
2423   mat->insertmode   = NOT_SET_VALUES;
2424 
2425   a      = (Mat_MPISBAIJ *)mat->data;
2426   a->bs2 = oldmat->bs2;
2427   a->mbs = oldmat->mbs;
2428   a->nbs = oldmat->nbs;
2429   a->Mbs = oldmat->Mbs;
2430   a->Nbs = oldmat->Nbs;
2431 
2432   a->size         = oldmat->size;
2433   a->rank         = oldmat->rank;
2434   a->donotstash   = oldmat->donotstash;
2435   a->roworiented  = oldmat->roworiented;
2436   a->rowindices   = NULL;
2437   a->rowvalues    = NULL;
2438   a->getrowactive = PETSC_FALSE;
2439   a->barray       = NULL;
2440   a->rstartbs     = oldmat->rstartbs;
2441   a->rendbs       = oldmat->rendbs;
2442   a->cstartbs     = oldmat->cstartbs;
2443   a->cendbs       = oldmat->cendbs;
2444 
2445   /* hash table stuff */
2446   a->ht           = NULL;
2447   a->hd           = NULL;
2448   a->ht_size      = 0;
2449   a->ht_flag      = oldmat->ht_flag;
2450   a->ht_fact      = oldmat->ht_fact;
2451   a->ht_total_ct  = 0;
2452   a->ht_insert_ct = 0;
2453 
2454   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2455   if (oldmat->colmap) {
2456 #if defined(PETSC_USE_CTABLE)
2457     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2458 #else
2459     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2460     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2461 #endif
2462   } else a->colmap = NULL;
2463 
2464   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2465     PetscCall(PetscMalloc1(len, &a->garray));
2466     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2467   } else a->garray = NULL;
2468 
2469   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2470   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2471   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2472 
2473   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2474   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2475 
2476   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2477   PetscCall(VecGetArray(a->slvec1, &array));
2478   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2479   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2480   PetscCall(VecRestoreArray(a->slvec1, &array));
2481   PetscCall(VecGetArray(a->slvec0, &array));
2482   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2483   PetscCall(VecRestoreArray(a->slvec0, &array));
2484 
2485   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2486   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2487   a->sMvctx = oldmat->sMvctx;
2488 
2489   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2490   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2491   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2492   *newmat = mat;
2493   PetscFunctionReturn(PETSC_SUCCESS);
2494 }
2495 
2496 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2497 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2498 
2499 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2500 {
2501   PetscBool isbinary;
2502 
2503   PetscFunctionBegin;
2504   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2505   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2506   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2507   PetscFunctionReturn(PETSC_SUCCESS);
2508 }
2509 
2510 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2511 {
2512   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2513   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2514   PetscReal     atmp;
2515   PetscReal    *work, *svalues, *rvalues;
2516   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2517   PetscMPIInt   rank, size;
2518   PetscInt     *rowners_bs, dest, count, source;
2519   PetscScalar  *va;
2520   MatScalar    *ba;
2521   MPI_Status    stat;
2522 
2523   PetscFunctionBegin;
2524   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2525   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2526   PetscCall(VecGetArray(v, &va));
2527 
2528   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2529   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2530 
2531   bs  = A->rmap->bs;
2532   mbs = a->mbs;
2533   Mbs = a->Mbs;
2534   ba  = b->a;
2535   bi  = b->i;
2536   bj  = b->j;
2537 
2538   /* find ownerships */
2539   rowners_bs = A->rmap->range;
2540 
2541   /* each proc creates an array to be distributed */
2542   PetscCall(PetscCalloc1(bs * Mbs, &work));
2543 
2544   /* row_max for B */
2545   if (rank != size - 1) {
2546     for (i = 0; i < mbs; i++) {
2547       ncols = bi[1] - bi[0];
2548       bi++;
2549       brow = bs * i;
2550       for (j = 0; j < ncols; j++) {
2551         bcol = bs * (*bj);
2552         for (kcol = 0; kcol < bs; kcol++) {
2553           col = bcol + kcol;           /* local col index */
2554           col += rowners_bs[rank + 1]; /* global col index */
2555           for (krow = 0; krow < bs; krow++) {
2556             atmp = PetscAbsScalar(*ba);
2557             ba++;
2558             row = brow + krow; /* local row index */
2559             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2560             if (work[col] < atmp) work[col] = atmp;
2561           }
2562         }
2563         bj++;
2564       }
2565     }
2566 
2567     /* send values to its owners */
2568     for (dest = rank + 1; dest < size; dest++) {
2569       svalues = work + rowners_bs[dest];
2570       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2571       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2572     }
2573   }
2574 
2575   /* receive values */
2576   if (rank) {
2577     rvalues = work;
2578     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2579     for (source = 0; source < rank; source++) {
2580       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2581       /* process values */
2582       for (i = 0; i < count; i++) {
2583         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2584       }
2585     }
2586   }
2587 
2588   PetscCall(VecRestoreArray(v, &va));
2589   PetscCall(PetscFree(work));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2594 {
2595   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2596   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2597   PetscScalar       *x, *ptr, *from;
2598   Vec                bb1;
2599   const PetscScalar *b;
2600 
2601   PetscFunctionBegin;
2602   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2603   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2604 
2605   if (flag == SOR_APPLY_UPPER) {
2606     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2607     PetscFunctionReturn(PETSC_SUCCESS);
2608   }
2609 
2610   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2611     if (flag & SOR_ZERO_INITIAL_GUESS) {
2612       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2613       its--;
2614     }
2615 
2616     PetscCall(VecDuplicate(bb, &bb1));
2617     while (its--) {
2618       /* lower triangular part: slvec0b = - B^T*xx */
2619       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2620 
2621       /* copy xx into slvec0a */
2622       PetscCall(VecGetArray(mat->slvec0, &ptr));
2623       PetscCall(VecGetArray(xx, &x));
2624       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2625       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2626 
2627       PetscCall(VecScale(mat->slvec0, -1.0));
2628 
2629       /* copy bb into slvec1a */
2630       PetscCall(VecGetArray(mat->slvec1, &ptr));
2631       PetscCall(VecGetArrayRead(bb, &b));
2632       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2633       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2634 
2635       /* set slvec1b = 0 */
2636       PetscCall(VecSet(mat->slvec1b, 0.0));
2637 
2638       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2639       PetscCall(VecRestoreArray(xx, &x));
2640       PetscCall(VecRestoreArrayRead(bb, &b));
2641       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2642 
2643       /* upper triangular part: bb1 = bb1 - B*x */
2644       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2645 
2646       /* local diagonal sweep */
2647       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2648     }
2649     PetscCall(VecDestroy(&bb1));
2650   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2651     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2652   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2653     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2654   } else if (flag & SOR_EISENSTAT) {
2655     Vec                xx1;
2656     PetscBool          hasop;
2657     const PetscScalar *diag;
2658     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2659     PetscInt           i, n;
2660 
2661     if (!mat->xx1) {
2662       PetscCall(VecDuplicate(bb, &mat->xx1));
2663       PetscCall(VecDuplicate(bb, &mat->bb1));
2664     }
2665     xx1 = mat->xx1;
2666     bb1 = mat->bb1;
2667 
2668     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2669 
2670     if (!mat->diag) {
2671       /* this is wrong for same matrix with new nonzero values */
2672       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2673       PetscCall(MatGetDiagonal(matin, mat->diag));
2674     }
2675     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2676 
2677     if (hasop) {
2678       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2679       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2680     } else {
2681       /*
2682           These two lines are replaced by code that may be a bit faster for a good compiler
2683       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2684       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2685       */
2686       PetscCall(VecGetArray(mat->slvec1a, &sl));
2687       PetscCall(VecGetArrayRead(mat->diag, &diag));
2688       PetscCall(VecGetArrayRead(bb, &b));
2689       PetscCall(VecGetArray(xx, &x));
2690       PetscCall(VecGetLocalSize(xx, &n));
2691       if (omega == 1.0) {
2692         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2693         PetscCall(PetscLogFlops(2.0 * n));
2694       } else {
2695         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2696         PetscCall(PetscLogFlops(3.0 * n));
2697       }
2698       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2699       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2700       PetscCall(VecRestoreArrayRead(bb, &b));
2701       PetscCall(VecRestoreArray(xx, &x));
2702     }
2703 
2704     /* multiply off-diagonal portion of matrix */
2705     PetscCall(VecSet(mat->slvec1b, 0.0));
2706     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2707     PetscCall(VecGetArray(mat->slvec0, &from));
2708     PetscCall(VecGetArray(xx, &x));
2709     PetscCall(PetscArraycpy(from, x, bs * mbs));
2710     PetscCall(VecRestoreArray(mat->slvec0, &from));
2711     PetscCall(VecRestoreArray(xx, &x));
2712     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2713     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2714     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2715 
2716     /* local sweep */
2717     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2718     PetscCall(VecAXPY(xx, 1.0, xx1));
2719   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2720   PetscFunctionReturn(PETSC_SUCCESS);
2721 }
2722 
2723 /*@
2724      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2725          CSR format the local rows.
2726 
2727    Collective
2728 
2729    Input Parameters:
2730 +  comm - MPI communicator
2731 .  bs - the block size, only a block size of 1 is supported
2732 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2733 .  n - This value should be the same as the local size used in creating the
2734        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2735        calculated if `N` is given) For square matrices `n` is almost always `m`.
2736 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2737 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2738 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2739 .   j - column indices
2740 -   a - matrix values
2741 
2742    Output Parameter:
2743 .   mat - the matrix
2744 
2745    Level: intermediate
2746 
2747    Notes:
2748        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2749      thus you CANNOT change the matrix entries by changing the values of `a` after you have
2750      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2751 
2752        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2753 
2754 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2755           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2756 @*/
2757 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2758 {
2759   PetscFunctionBegin;
2760   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2761   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2762   PetscCall(MatCreate(comm, mat));
2763   PetscCall(MatSetSizes(*mat, m, n, M, N));
2764   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2765   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2766   PetscFunctionReturn(PETSC_SUCCESS);
2767 }
2768 
2769 /*@C
2770    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2771 
2772    Collective
2773 
2774    Input Parameters:
2775 +  B - the matrix
2776 .  bs - the block size
2777 .  i - the indices into `j` for the start of each local row (starts with zero)
2778 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2779 -  v - optional values in the matrix
2780 
2781    Level: advanced
2782 
2783    Notes:
2784    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2785    and usually the numerical values as well
2786 
2787    Any entries below the diagonal are ignored
2788 
2789 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2790 @*/
2791 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2792 {
2793   PetscFunctionBegin;
2794   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2795   PetscFunctionReturn(PETSC_SUCCESS);
2796 }
2797 
2798 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2799 {
2800   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2801   PetscInt    *indx;
2802   PetscScalar *values;
2803 
2804   PetscFunctionBegin;
2805   PetscCall(MatGetSize(inmat, &m, &N));
2806   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2807     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2808     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2809     PetscInt     *bindx, rmax = a->rmax, j;
2810     PetscMPIInt   rank, size;
2811 
2812     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2813     mbs = m / bs;
2814     Nbs = N / cbs;
2815     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2816     nbs = n / cbs;
2817 
2818     PetscCall(PetscMalloc1(rmax, &bindx));
2819     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2820 
2821     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2822     PetscCallMPI(MPI_Comm_rank(comm, &size));
2823     if (rank == size - 1) {
2824       /* Check sum(nbs) = Nbs */
2825       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2826     }
2827 
2828     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2829     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2830     for (i = 0; i < mbs; i++) {
2831       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2832       nnz = nnz / bs;
2833       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2834       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2835       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2836     }
2837     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2838     PetscCall(PetscFree(bindx));
2839 
2840     PetscCall(MatCreate(comm, outmat));
2841     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2842     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2843     PetscCall(MatSetType(*outmat, MATSBAIJ));
2844     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2845     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2846     MatPreallocateEnd(dnz, onz);
2847   }
2848 
2849   /* numeric phase */
2850   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2851   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2852 
2853   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2854   for (i = 0; i < m; i++) {
2855     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2856     Ii = i + rstart;
2857     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2858     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2859   }
2860   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2861   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2862   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2863   PetscFunctionReturn(PETSC_SUCCESS);
2864 }
2865