xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 966bd95a39c2334d2e2ce17ad22128f3c1861eeb)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237   a_noinsert:; \
238     ailen[brow] = nrow; \
239   } while (0)
240 
241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
242   do { \
243     brow = row / bs; \
244     rp   = bj + bi[brow]; \
245     ap   = ba + bs2 * bi[brow]; \
246     rmax = bimax[brow]; \
247     nrow = bilen[brow]; \
248     bcol = col / bs; \
249     ridx = row % bs; \
250     cidx = col % bs; \
251     low  = 0; \
252     high = nrow; \
253     while (high - low > 3) { \
254       t = (low + high) / 2; \
255       if (rp[t] > bcol) high = t; \
256       else low = t; \
257     } \
258     for (_i = low; _i < high; _i++) { \
259       if (rp[_i] > bcol) break; \
260       if (rp[_i] == bcol) { \
261         bap = ap + bs2 * _i + bs * cidx + ridx; \
262         if (addv == ADD_VALUES) *bap += value; \
263         else *bap = value; \
264         goto b_noinsert; \
265       } \
266     } \
267     if (b->nonew == 1) goto b_noinsert; \
268     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
269     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
270     N = nrow++ - 1; \
271     /* shift up all the later entries in this row */ \
272     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
273     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
274     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
275     rp[_i]                          = bcol; \
276     ap[bs2 * _i + bs * cidx + ridx] = value; \
277   b_noinsert:; \
278     bilen[brow] = nrow; \
279   } while (0)
280 
281 /* Only add/insert a(i,j) with i<=j (blocks).
282    Any a(i,j) with i>j input by user is ignored or generates an error
283 */
284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
285 {
286   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
287   MatScalar     value;
288   PetscBool     roworiented = baij->roworiented;
289   PetscInt      i, j, row, col;
290   PetscInt      rstart_orig = mat->rmap->rstart;
291   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
292   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
293 
294   /* Some Variables required in the macro */
295   Mat           A     = baij->A;
296   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)A->data;
297   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
298   MatScalar    *aa = a->a;
299 
300   Mat          B     = baij->B;
301   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
302   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
303   MatScalar   *ba = b->a;
304 
305   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
306   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
307   MatScalar *ap, *bap;
308 
309   /* for stash */
310   PetscInt   n_loc, *in_loc = NULL;
311   MatScalar *v_loc = NULL;
312 
313   PetscFunctionBegin;
314   if (!baij->donotstash) {
315     if (n > baij->n_loc) {
316       PetscCall(PetscFree(baij->in_loc));
317       PetscCall(PetscFree(baij->v_loc));
318       PetscCall(PetscMalloc1(n, &baij->in_loc));
319       PetscCall(PetscMalloc1(n, &baij->v_loc));
320 
321       baij->n_loc = n;
322     }
323     in_loc = baij->in_loc;
324     v_loc  = baij->v_loc;
325   }
326 
327   for (i = 0; i < m; i++) {
328     if (im[i] < 0) continue;
329     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
330     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
331       row = im[i] - rstart_orig;                     /* local row index */
332       for (j = 0; j < n; j++) {
333         if (im[i] / bs > in[j] / bs) {
334           PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
335           continue; /* ignore lower triangular blocks */
336         }
337         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
338           col  = in[j] - cstart_orig;                    /* local col index */
339           brow = row / bs;
340           bcol = col / bs;
341           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
342           if (roworiented) value = v[i * n + j];
343           else value = v[i + j * m];
344           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
345           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
346         } else if (in[j] < 0) {
347           continue;
348         } else {
349           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
350           /* off-diag entry (B) */
351           if (mat->was_assembled) {
352             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
353 #if defined(PETSC_USE_CTABLE)
354             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
355             col = col - 1;
356 #else
357             col = baij->colmap[in[j] / bs] - 1;
358 #endif
359             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
360               PetscCall(MatDisAssemble_MPISBAIJ(mat));
361               col = in[j];
362               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
363               B     = baij->B;
364               b     = (Mat_SeqBAIJ *)B->data;
365               bimax = b->imax;
366               bi    = b->i;
367               bilen = b->ilen;
368               bj    = b->j;
369               ba    = b->a;
370             } else col += in[j] % bs;
371           } else col = in[j];
372           if (roworiented) value = v[i * n + j];
373           else value = v[i + j * m];
374           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
375           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
376         }
377       }
378     } else { /* off processor entry */
379       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
380       if (!baij->donotstash) {
381         mat->assembled = PETSC_FALSE;
382         n_loc          = 0;
383         for (j = 0; j < n; j++) {
384           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
385           in_loc[n_loc] = in[j];
386           if (roworiented) {
387             v_loc[n_loc] = v[i * n + j];
388           } else {
389             v_loc[n_loc] = v[j * m + i];
390           }
391           n_loc++;
392         }
393         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
394       }
395     }
396   }
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
401 {
402   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
403   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
404   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
405   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
406   PetscBool          roworiented = a->roworiented;
407   const PetscScalar *value       = v;
408   MatScalar         *ap, *aa = a->a, *bap;
409 
410   PetscFunctionBegin;
411   if (col < row) {
412     PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
413     PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
414   }
415   rp    = aj + ai[row];
416   ap    = aa + bs2 * ai[row];
417   rmax  = imax[row];
418   nrow  = ailen[row];
419   value = v;
420   low   = 0;
421   high  = nrow;
422 
423   while (high - low > 7) {
424     t = (low + high) / 2;
425     if (rp[t] > col) high = t;
426     else low = t;
427   }
428   for (i = low; i < high; i++) {
429     if (rp[i] > col) break;
430     if (rp[i] == col) {
431       bap = ap + bs2 * i;
432       if (roworiented) {
433         if (is == ADD_VALUES) {
434           for (ii = 0; ii < bs; ii++) {
435             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
436           }
437         } else {
438           for (ii = 0; ii < bs; ii++) {
439             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
440           }
441         }
442       } else {
443         if (is == ADD_VALUES) {
444           for (ii = 0; ii < bs; ii++) {
445             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
446           }
447         } else {
448           for (ii = 0; ii < bs; ii++) {
449             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
450           }
451         }
452       }
453       goto noinsert2;
454     }
455   }
456   if (nonew == 1) goto noinsert2;
457   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
458   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
459   N = nrow++ - 1;
460   high++;
461   /* shift up all the later entries in this row */
462   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
463   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
464   rp[i] = col;
465   bap   = ap + bs2 * i;
466   if (roworiented) {
467     for (ii = 0; ii < bs; ii++) {
468       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
469     }
470   } else {
471     for (ii = 0; ii < bs; ii++) {
472       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
473     }
474   }
475 noinsert2:;
476   ailen[row] = nrow;
477   PetscFunctionReturn(PETSC_SUCCESS);
478 }
479 
480 /*
481    This routine is exactly duplicated in mpibaij.c
482 */
483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
484 {
485   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
486   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
487   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
488   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
489   PetscBool          roworiented = a->roworiented;
490   const PetscScalar *value       = v;
491   MatScalar         *ap, *aa = a->a, *bap;
492 
493   PetscFunctionBegin;
494   rp    = aj + ai[row];
495   ap    = aa + bs2 * ai[row];
496   rmax  = imax[row];
497   nrow  = ailen[row];
498   low   = 0;
499   high  = nrow;
500   value = v;
501   while (high - low > 7) {
502     t = (low + high) / 2;
503     if (rp[t] > col) high = t;
504     else low = t;
505   }
506   for (i = low; i < high; i++) {
507     if (rp[i] > col) break;
508     if (rp[i] == col) {
509       bap = ap + bs2 * i;
510       if (roworiented) {
511         if (is == ADD_VALUES) {
512           for (ii = 0; ii < bs; ii++) {
513             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
514           }
515         } else {
516           for (ii = 0; ii < bs; ii++) {
517             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
518           }
519         }
520       } else {
521         if (is == ADD_VALUES) {
522           for (ii = 0; ii < bs; ii++, value += bs) {
523             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
524             bap += bs;
525           }
526         } else {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
529             bap += bs;
530           }
531         }
532       }
533       goto noinsert2;
534     }
535   }
536   if (nonew == 1) goto noinsert2;
537   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
538   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
539   N = nrow++ - 1;
540   high++;
541   /* shift up all the later entries in this row */
542   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
543   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
544   rp[i] = col;
545   bap   = ap + bs2 * i;
546   if (roworiented) {
547     for (ii = 0; ii < bs; ii++) {
548       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
549     }
550   } else {
551     for (ii = 0; ii < bs; ii++) {
552       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
553     }
554   }
555 noinsert2:;
556   ailen[row] = nrow;
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 /*
561     This routine could be optimized by removing the need for the block copy below and passing stride information
562   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
563 */
564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
565 {
566   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
567   const MatScalar *value;
568   MatScalar       *barray      = baij->barray;
569   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
570   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
571   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
572   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
573 
574   PetscFunctionBegin;
575   if (!barray) {
576     PetscCall(PetscMalloc1(bs2, &barray));
577     baij->barray = barray;
578   }
579 
580   if (roworiented) {
581     stepval = (n - 1) * bs;
582   } else {
583     stepval = (m - 1) * bs;
584   }
585   for (i = 0; i < m; i++) {
586     if (im[i] < 0) continue;
587     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
588     if (im[i] >= rstart && im[i] < rend) {
589       row = im[i] - rstart;
590       for (j = 0; j < n; j++) {
591         if (im[i] > in[j]) {
592           PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
593           continue; /* ignore lower triangular blocks */
594         }
595         /* If NumCol = 1 then a copy is not required */
596         if (roworiented && n == 1) {
597           barray = (MatScalar *)v + i * bs2;
598         } else if ((!roworiented) && (m == 1)) {
599           barray = (MatScalar *)v + j * bs2;
600         } else { /* Here a copy is required */
601           if (roworiented) {
602             value = v + i * (stepval + bs) * bs + j * bs;
603           } else {
604             value = v + j * (stepval + bs) * bs + i * bs;
605           }
606           for (ii = 0; ii < bs; ii++, value += stepval) {
607             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
608           }
609           barray -= bs2;
610         }
611 
612         if (in[j] >= cstart && in[j] < cend) {
613           col = in[j] - cstart;
614           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
615         } else if (in[j] < 0) {
616           continue;
617         } else {
618           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
619           if (mat->was_assembled) {
620             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
621 
622 #if defined(PETSC_USE_CTABLE)
623             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
624             col = col < 1 ? -1 : (col - 1) / bs;
625 #else
626             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
627 #endif
628             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
629               PetscCall(MatDisAssemble_MPISBAIJ(mat));
630               col = in[j];
631             }
632           } else col = in[j];
633           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
634         }
635       }
636     } else {
637       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
651 {
652   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
653   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
654   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
655 
656   PetscFunctionBegin;
657   for (i = 0; i < m; i++) {
658     if (idxm[i] < 0) continue; /* negative row */
659     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
660     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
661     row = idxm[i] - bsrstart;
662     for (j = 0; j < n; j++) {
663       if (idxn[j] < 0) continue; /* negative column */
664       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
665       if (idxn[j] >= bscstart && idxn[j] < bscend) {
666         col = idxn[j] - bscstart;
667         PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
668       } else {
669         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
670 #if defined(PETSC_USE_CTABLE)
671         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
672         data--;
673 #else
674         data = baij->colmap[idxn[j] / bs] - 1;
675 #endif
676         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
677         else {
678           col = data + idxn[j] % bs;
679           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
680         }
681       }
682     }
683   }
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
688 {
689   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
690   PetscReal     sum[2], *lnorm2;
691 
692   PetscFunctionBegin;
693   if (baij->size == 1) {
694     PetscCall(MatNorm(baij->A, type, norm));
695   } else {
696     if (type == NORM_FROBENIUS) {
697       PetscCall(PetscMalloc1(2, &lnorm2));
698       PetscCall(MatNorm(baij->A, type, lnorm2));
699       *lnorm2 = (*lnorm2) * (*lnorm2);
700       lnorm2++; /* squar power of norm(A) */
701       PetscCall(MatNorm(baij->B, type, lnorm2));
702       *lnorm2 = (*lnorm2) * (*lnorm2);
703       lnorm2--; /* squar power of norm(B) */
704       PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
705       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
706       PetscCall(PetscFree(lnorm2));
707     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
708       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
709       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
710       PetscReal    *rsum, vabs;
711       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
712       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
713       MatScalar    *v;
714 
715       PetscCall(PetscMalloc1(mat->cmap->N, &rsum));
716       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
717       /* Amat */
718       v  = amat->a;
719       jj = amat->j;
720       for (brow = 0; brow < mbs; brow++) {
721         grow = bs * (rstart + brow);
722         nz   = amat->i[brow + 1] - amat->i[brow];
723         for (bcol = 0; bcol < nz; bcol++) {
724           gcol = bs * (rstart + *jj);
725           jj++;
726           for (col = 0; col < bs; col++) {
727             for (row = 0; row < bs; row++) {
728               vabs = PetscAbsScalar(*v);
729               v++;
730               rsum[gcol + col] += vabs;
731               /* non-diagonal block */
732               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
733             }
734           }
735         }
736         PetscCall(PetscLogFlops(nz * bs * bs));
737       }
738       /* Bmat */
739       v  = bmat->a;
740       jj = bmat->j;
741       for (brow = 0; brow < mbs; brow++) {
742         grow = bs * (rstart + brow);
743         nz   = bmat->i[brow + 1] - bmat->i[brow];
744         for (bcol = 0; bcol < nz; bcol++) {
745           gcol = bs * garray[*jj];
746           jj++;
747           for (col = 0; col < bs; col++) {
748             for (row = 0; row < bs; row++) {
749               vabs = PetscAbsScalar(*v);
750               v++;
751               rsum[gcol + col] += vabs;
752               rsum[grow + row] += vabs;
753             }
754           }
755         }
756         PetscCall(PetscLogFlops(nz * bs * bs));
757       }
758       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
759       *norm = 0.0;
760       for (col = 0; col < mat->cmap->N; col++) {
761         if (rsum[col] > *norm) *norm = rsum[col];
762       }
763       PetscCall(PetscFree(rsum));
764     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
765   }
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
769 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
770 {
771   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
772   PetscInt      nstash, reallocs;
773 
774   PetscFunctionBegin;
775   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
776 
777   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
778   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
779   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
780   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
781   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
782   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
787 {
788   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
789   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
790   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
791   PetscInt     *row, *col;
792   PetscBool     all_assembled;
793   PetscMPIInt   n;
794   PetscBool     r1, r2, r3;
795   MatScalar    *val;
796 
797   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
798   PetscFunctionBegin;
799   if (!baij->donotstash && !mat->nooffprocentries) {
800     while (1) {
801       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
802       if (!flg) break;
803 
804       for (i = 0; i < n;) {
805         /* Now identify the consecutive vals belonging to the same row */
806         for (j = i, rstart = row[j]; j < n; j++) {
807           if (row[j] != rstart) break;
808         }
809         if (j < n) ncols = j - i;
810         else ncols = n - i;
811         /* Now assemble all these values with a single function call */
812         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
813         i = j;
814       }
815     }
816     PetscCall(MatStashScatterEnd_Private(&mat->stash));
817     /* Now process the block-stash. Since the values are stashed column-oriented,
818        set the row-oriented flag to column-oriented, and after MatSetValues()
819        restore the original flags */
820     r1 = baij->roworiented;
821     r2 = a->roworiented;
822     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
823 
824     baij->roworiented = PETSC_FALSE;
825     a->roworiented    = PETSC_FALSE;
826 
827     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */
828     while (1) {
829       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
830       if (!flg) break;
831 
832       for (i = 0; i < n;) {
833         /* Now identify the consecutive vals belonging to the same row */
834         for (j = i, rstart = row[j]; j < n; j++) {
835           if (row[j] != rstart) break;
836         }
837         if (j < n) ncols = j - i;
838         else ncols = n - i;
839         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
840         i = j;
841       }
842     }
843     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
844 
845     baij->roworiented = r1;
846     a->roworiented    = r2;
847 
848     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
849   }
850 
851   PetscCall(MatAssemblyBegin(baij->A, mode));
852   PetscCall(MatAssemblyEnd(baij->A, mode));
853 
854   /* determine if any process has disassembled, if so we must
855      also disassemble ourselves, in order that we may reassemble. */
856   /*
857      if nonzero structure of submatrix B cannot change then we know that
858      no process disassembled thus we can skip this stuff
859   */
860   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
861     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
862     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
863   }
864 
865   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
866   PetscCall(MatAssemblyBegin(baij->B, mode));
867   PetscCall(MatAssemblyEnd(baij->B, mode));
868 
869   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
870 
871   baij->rowvalues = NULL;
872 
873   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
874   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
875     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
876     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
877   }
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
882 #include <petscdraw.h>
883 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
884 {
885   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
886   PetscInt          bs   = mat->rmap->bs;
887   PetscMPIInt       rank = baij->rank;
888   PetscBool         isascii, isdraw;
889   PetscViewer       sviewer;
890   PetscViewerFormat format;
891 
892   PetscFunctionBegin;
893   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
894   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
895   if (isascii) {
896     PetscCall(PetscViewerGetFormat(viewer, &format));
897     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
898       MatInfo info;
899       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
900       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
901       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
902       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
903                                                    mat->rmap->bs, info.memory));
904       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
905       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
906       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
907       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
908       PetscCall(PetscViewerFlush(viewer));
909       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
910       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
911       PetscCall(VecScatterView(baij->Mvctx, viewer));
912       PetscFunctionReturn(PETSC_SUCCESS);
913     } else if (format == PETSC_VIEWER_ASCII_INFO) {
914       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
915       PetscFunctionReturn(PETSC_SUCCESS);
916     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
917       PetscFunctionReturn(PETSC_SUCCESS);
918     }
919   }
920 
921   if (isdraw) {
922     PetscDraw draw;
923     PetscBool isnull;
924     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
925     PetscCall(PetscDrawIsNull(draw, &isnull));
926     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
927   }
928 
929   {
930     /* assemble the entire matrix onto first processor. */
931     Mat           A;
932     Mat_SeqSBAIJ *Aloc;
933     Mat_SeqBAIJ  *Bloc;
934     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
935     MatScalar    *a;
936     const char   *matname;
937 
938     /* Should this be the same type as mat? */
939     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
940     if (rank == 0) {
941       PetscCall(MatSetSizes(A, M, N, M, N));
942     } else {
943       PetscCall(MatSetSizes(A, 0, 0, M, N));
944     }
945     PetscCall(MatSetType(A, MATMPISBAIJ));
946     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
947     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
948 
949     /* copy over the A part */
950     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
951     ai   = Aloc->i;
952     aj   = Aloc->j;
953     a    = Aloc->a;
954     PetscCall(PetscMalloc1(bs, &rvals));
955 
956     for (i = 0; i < mbs; i++) {
957       rvals[0] = bs * (baij->rstartbs + i);
958       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
959       for (j = ai[i]; j < ai[i + 1]; j++) {
960         col = (baij->cstartbs + aj[j]) * bs;
961         for (k = 0; k < bs; k++) {
962           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
963           col++;
964           a += bs;
965         }
966       }
967     }
968     /* copy over the B part */
969     Bloc = (Mat_SeqBAIJ *)baij->B->data;
970     ai   = Bloc->i;
971     aj   = Bloc->j;
972     a    = Bloc->a;
973     for (i = 0; i < mbs; i++) {
974       rvals[0] = bs * (baij->rstartbs + i);
975       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976       for (j = ai[i]; j < ai[i + 1]; j++) {
977         col = baij->garray[aj[j]] * bs;
978         for (k = 0; k < bs; k++) {
979           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
980           col++;
981           a += bs;
982         }
983       }
984     }
985     PetscCall(PetscFree(rvals));
986     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
987     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
988     /*
989        Everyone has to call to draw the matrix since the graphics waits are
990        synchronized across all processors that share the PetscDraw object
991     */
992     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
993     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
994     if (rank == 0) {
995       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
996       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
997     }
998     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
999     PetscCall(MatDestroy(&A));
1000   }
1001   PetscFunctionReturn(PETSC_SUCCESS);
1002 }
1003 
1004 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1005 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1006 
1007 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1008 {
1009   PetscBool isascii, isdraw, issocket, isbinary;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1013   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1014   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1015   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1016   if (isascii || isdraw || issocket) {
1017     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1018   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
1022 #if defined(PETSC_USE_COMPLEX)
1023 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1024 {
1025   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1026   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1027   PetscScalar       *from;
1028   const PetscScalar *x;
1029 
1030   PetscFunctionBegin;
1031   /* diagonal part */
1032   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1033   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1034   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1035   PetscCall(VecZeroEntries(a->slvec1b));
1036 
1037   /* subdiagonal part */
1038   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1039   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1040 
1041   /* copy x into the vec slvec0 */
1042   PetscCall(VecGetArray(a->slvec0, &from));
1043   PetscCall(VecGetArrayRead(xx, &x));
1044 
1045   PetscCall(PetscArraycpy(from, x, bs * mbs));
1046   PetscCall(VecRestoreArray(a->slvec0, &from));
1047   PetscCall(VecRestoreArrayRead(xx, &x));
1048 
1049   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1050   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1051   /* supperdiagonal part */
1052   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1053   PetscFunctionReturn(PETSC_SUCCESS);
1054 }
1055 #endif
1056 
1057 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1058 {
1059   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1060   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1061   PetscScalar       *from;
1062   const PetscScalar *x;
1063 
1064   PetscFunctionBegin;
1065   /* diagonal part */
1066   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1067   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1068   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1069   PetscCall(VecZeroEntries(a->slvec1b));
1070 
1071   /* subdiagonal part */
1072   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1073 
1074   /* copy x into the vec slvec0 */
1075   PetscCall(VecGetArray(a->slvec0, &from));
1076   PetscCall(VecGetArrayRead(xx, &x));
1077 
1078   PetscCall(PetscArraycpy(from, x, bs * mbs));
1079   PetscCall(VecRestoreArray(a->slvec0, &from));
1080   PetscCall(VecRestoreArrayRead(xx, &x));
1081 
1082   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1083   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1084   /* supperdiagonal part */
1085   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 #if PetscDefined(USE_COMPLEX)
1090 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1091 {
1092   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1093   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1094   PetscScalar       *from;
1095   const PetscScalar *x;
1096 
1097   PetscFunctionBegin;
1098   /* diagonal part */
1099   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1100   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1101   PetscCall(VecZeroEntries(a->slvec1b));
1102 
1103   /* subdiagonal part */
1104   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1105   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1106 
1107   /* copy x into the vec slvec0 */
1108   PetscCall(VecGetArray(a->slvec0, &from));
1109   PetscCall(VecGetArrayRead(xx, &x));
1110   PetscCall(PetscArraycpy(from, x, bs * mbs));
1111   PetscCall(VecRestoreArray(a->slvec0, &from));
1112 
1113   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1114   PetscCall(VecRestoreArrayRead(xx, &x));
1115   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1116 
1117   /* supperdiagonal part */
1118   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1119   PetscFunctionReturn(PETSC_SUCCESS);
1120 }
1121 #endif
1122 
1123 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1124 {
1125   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1126   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1127   PetscScalar       *from;
1128   const PetscScalar *x;
1129 
1130   PetscFunctionBegin;
1131   /* diagonal part */
1132   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1133   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1134   PetscCall(VecZeroEntries(a->slvec1b));
1135 
1136   /* subdiagonal part */
1137   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1138 
1139   /* copy x into the vec slvec0 */
1140   PetscCall(VecGetArray(a->slvec0, &from));
1141   PetscCall(VecGetArrayRead(xx, &x));
1142   PetscCall(PetscArraycpy(from, x, bs * mbs));
1143   PetscCall(VecRestoreArray(a->slvec0, &from));
1144 
1145   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1146   PetscCall(VecRestoreArrayRead(xx, &x));
1147   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1148 
1149   /* supperdiagonal part */
1150   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1151   PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153 
1154 /*
1155   This only works correctly for square matrices where the subblock A->A is the
1156    diagonal block
1157 */
1158 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1159 {
1160   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1161 
1162   PetscFunctionBegin;
1163   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1164   PetscCall(MatGetDiagonal(a->A, v));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1169 {
1170   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1171 
1172   PetscFunctionBegin;
1173   PetscCall(MatScale(a->A, aa));
1174   PetscCall(MatScale(a->B, aa));
1175   PetscFunctionReturn(PETSC_SUCCESS);
1176 }
1177 
1178 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1179 {
1180   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1181   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1182   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1183   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1184   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1185 
1186   PetscFunctionBegin;
1187   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1188   mat->getrowactive = PETSC_TRUE;
1189 
1190   if (!mat->rowvalues && (idx || v)) {
1191     /*
1192         allocate enough space to hold information from the longest row.
1193     */
1194     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1195     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1196     PetscInt      max = 1, mbs = mat->mbs, tmp;
1197     for (i = 0; i < mbs; i++) {
1198       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1199       if (max < tmp) max = tmp;
1200     }
1201     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1202   }
1203 
1204   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1205   lrow = row - brstart; /* local row index */
1206 
1207   pvA = &vworkA;
1208   pcA = &cworkA;
1209   pvB = &vworkB;
1210   pcB = &cworkB;
1211   if (!v) {
1212     pvA = NULL;
1213     pvB = NULL;
1214   }
1215   if (!idx) {
1216     pcA = NULL;
1217     if (!v) pcB = NULL;
1218   }
1219   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1220   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1221   nztot = nzA + nzB;
1222 
1223   cmap = mat->garray;
1224   if (v || idx) {
1225     if (nztot) {
1226       /* Sort by increasing column numbers, assuming A and B already sorted */
1227       PetscInt imark = -1;
1228       if (v) {
1229         *v = v_p = mat->rowvalues;
1230         for (i = 0; i < nzB; i++) {
1231           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1232           else break;
1233         }
1234         imark = i;
1235         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1236         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1237       }
1238       if (idx) {
1239         *idx = idx_p = mat->rowindices;
1240         if (imark > -1) {
1241           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1242         } else {
1243           for (i = 0; i < nzB; i++) {
1244             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1245             else break;
1246           }
1247           imark = i;
1248         }
1249         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1250         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1251       }
1252     } else {
1253       if (idx) *idx = NULL;
1254       if (v) *v = NULL;
1255     }
1256   }
1257   *nz = nztot;
1258   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1259   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1260   PetscFunctionReturn(PETSC_SUCCESS);
1261 }
1262 
1263 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1264 {
1265   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1266 
1267   PetscFunctionBegin;
1268   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1269   baij->getrowactive = PETSC_FALSE;
1270   PetscFunctionReturn(PETSC_SUCCESS);
1271 }
1272 
1273 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1274 {
1275   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1276   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1277 
1278   PetscFunctionBegin;
1279   aA->getrow_utriangular = PETSC_TRUE;
1280   PetscFunctionReturn(PETSC_SUCCESS);
1281 }
1282 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1283 {
1284   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1285   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1286 
1287   PetscFunctionBegin;
1288   aA->getrow_utriangular = PETSC_FALSE;
1289   PetscFunctionReturn(PETSC_SUCCESS);
1290 }
1291 
1292 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1293 {
1294   PetscFunctionBegin;
1295   if (PetscDefined(USE_COMPLEX)) {
1296     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1297 
1298     PetscCall(MatConjugate(a->A));
1299     PetscCall(MatConjugate(a->B));
1300   }
1301   PetscFunctionReturn(PETSC_SUCCESS);
1302 }
1303 
1304 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1305 {
1306   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1307 
1308   PetscFunctionBegin;
1309   PetscCall(MatRealPart(a->A));
1310   PetscCall(MatRealPart(a->B));
1311   PetscFunctionReturn(PETSC_SUCCESS);
1312 }
1313 
1314 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1315 {
1316   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1317 
1318   PetscFunctionBegin;
1319   PetscCall(MatImaginaryPart(a->A));
1320   PetscCall(MatImaginaryPart(a->B));
1321   PetscFunctionReturn(PETSC_SUCCESS);
1322 }
1323 
1324 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1325    Input: isrow       - distributed(parallel),
1326           iscol_local - locally owned (seq)
1327 */
1328 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1329 {
1330   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1331   const PetscInt *ptr1, *ptr2;
1332 
1333   PetscFunctionBegin;
1334   *flg = PETSC_FALSE;
1335   PetscCall(ISGetLocalSize(isrow, &sz1));
1336   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1337   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1338 
1339   PetscCall(ISGetIndices(isrow, &ptr1));
1340   PetscCall(ISGetIndices(iscol_local, &ptr2));
1341 
1342   PetscCall(PetscMalloc1(sz1, &a1));
1343   PetscCall(PetscMalloc1(sz2, &a2));
1344   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1345   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1346   PetscCall(PetscSortInt(sz1, a1));
1347   PetscCall(PetscSortInt(sz2, a2));
1348 
1349   nmatch = 0;
1350   k      = 0;
1351   for (i = 0; i < sz1; i++) {
1352     for (j = k; j < sz2; j++) {
1353       if (a1[i] == a2[j]) {
1354         k = j;
1355         nmatch++;
1356         break;
1357       }
1358     }
1359   }
1360   PetscCall(ISRestoreIndices(isrow, &ptr1));
1361   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1362   PetscCall(PetscFree(a1));
1363   PetscCall(PetscFree(a2));
1364   if (nmatch < sz1) {
1365     *flg = PETSC_FALSE;
1366   } else {
1367     *flg = PETSC_TRUE;
1368   }
1369   PetscFunctionReturn(PETSC_SUCCESS);
1370 }
1371 
1372 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1373 {
1374   Mat       C[2];
1375   IS        iscol_local, isrow_local;
1376   PetscInt  csize, csize_local, rsize;
1377   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1378 
1379   PetscFunctionBegin;
1380   PetscCall(ISGetLocalSize(iscol, &csize));
1381   PetscCall(ISGetLocalSize(isrow, &rsize));
1382   if (call == MAT_REUSE_MATRIX) {
1383     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1384     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1385   } else {
1386     PetscCall(ISAllGather(iscol, &iscol_local));
1387     PetscCall(ISSorted(iscol_local, &issorted));
1388     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1389   }
1390   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1391   if (!isequal) {
1392     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1393     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1394     if (!isidentity) {
1395       if (call == MAT_REUSE_MATRIX) {
1396         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1397         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1398       } else {
1399         PetscCall(ISAllGather(isrow, &isrow_local));
1400         PetscCall(ISSorted(isrow_local, &issorted));
1401         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1402       }
1403     }
1404   }
1405   /* now call MatCreateSubMatrix_MPIBAIJ() */
1406   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1407   if (!isequal && !isidentity) {
1408     if (call == MAT_INITIAL_MATRIX) {
1409       IS       intersect;
1410       PetscInt ni;
1411 
1412       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1413       PetscCall(ISGetLocalSize(intersect, &ni));
1414       PetscCall(ISDestroy(&intersect));
1415       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1416     }
1417     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1418     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1419     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1420     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1421     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1422     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1423     PetscCall(MatDestroy(C));
1424     PetscCall(MatDestroy(C + 1));
1425   }
1426   if (call == MAT_INITIAL_MATRIX) {
1427     if (!isequal && !isidentity) {
1428       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1429       PetscCall(ISDestroy(&isrow_local));
1430     }
1431     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1432     PetscCall(ISDestroy(&iscol_local));
1433   }
1434   PetscFunctionReturn(PETSC_SUCCESS);
1435 }
1436 
1437 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1438 {
1439   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1440 
1441   PetscFunctionBegin;
1442   PetscCall(MatZeroEntries(l->A));
1443   PetscCall(MatZeroEntries(l->B));
1444   PetscFunctionReturn(PETSC_SUCCESS);
1445 }
1446 
1447 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1448 {
1449   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1450   Mat            A = a->A, B = a->B;
1451   PetscLogDouble isend[5], irecv[5];
1452 
1453   PetscFunctionBegin;
1454   info->block_size = (PetscReal)matin->rmap->bs;
1455 
1456   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1457 
1458   isend[0] = info->nz_used;
1459   isend[1] = info->nz_allocated;
1460   isend[2] = info->nz_unneeded;
1461   isend[3] = info->memory;
1462   isend[4] = info->mallocs;
1463 
1464   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1465 
1466   isend[0] += info->nz_used;
1467   isend[1] += info->nz_allocated;
1468   isend[2] += info->nz_unneeded;
1469   isend[3] += info->memory;
1470   isend[4] += info->mallocs;
1471   if (flag == MAT_LOCAL) {
1472     info->nz_used      = isend[0];
1473     info->nz_allocated = isend[1];
1474     info->nz_unneeded  = isend[2];
1475     info->memory       = isend[3];
1476     info->mallocs      = isend[4];
1477   } else if (flag == MAT_GLOBAL_MAX) {
1478     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1479 
1480     info->nz_used      = irecv[0];
1481     info->nz_allocated = irecv[1];
1482     info->nz_unneeded  = irecv[2];
1483     info->memory       = irecv[3];
1484     info->mallocs      = irecv[4];
1485   } else if (flag == MAT_GLOBAL_SUM) {
1486     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1487 
1488     info->nz_used      = irecv[0];
1489     info->nz_allocated = irecv[1];
1490     info->nz_unneeded  = irecv[2];
1491     info->memory       = irecv[3];
1492     info->mallocs      = irecv[4];
1493   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1494   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1495   info->fill_ratio_needed = 0;
1496   info->factor_mallocs    = 0;
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
1500 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1501 {
1502   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1503   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1504 
1505   PetscFunctionBegin;
1506   switch (op) {
1507   case MAT_NEW_NONZERO_LOCATIONS:
1508   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1509   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1510   case MAT_KEEP_NONZERO_PATTERN:
1511   case MAT_NEW_NONZERO_LOCATION_ERR:
1512     MatCheckPreallocated(A, 1);
1513     PetscCall(MatSetOption(a->A, op, flg));
1514     PetscCall(MatSetOption(a->B, op, flg));
1515     break;
1516   case MAT_ROW_ORIENTED:
1517     MatCheckPreallocated(A, 1);
1518     a->roworiented = flg;
1519 
1520     PetscCall(MatSetOption(a->A, op, flg));
1521     PetscCall(MatSetOption(a->B, op, flg));
1522     break;
1523   case MAT_IGNORE_OFF_PROC_ENTRIES:
1524     a->donotstash = flg;
1525     break;
1526   case MAT_USE_HASH_TABLE:
1527     a->ht_flag = flg;
1528     break;
1529   case MAT_HERMITIAN:
1530     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1531 #if defined(PETSC_USE_COMPLEX)
1532     if (flg) { /* need different mat-vec ops */
1533       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1534       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1535       A->ops->multtranspose    = NULL;
1536       A->ops->multtransposeadd = NULL;
1537       A->symmetric             = PETSC_BOOL3_FALSE;
1538     }
1539 #endif
1540     break;
1541   case MAT_SPD:
1542   case MAT_SYMMETRIC:
1543     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1544 #if defined(PETSC_USE_COMPLEX)
1545     if (flg) { /* restore to use default mat-vec ops */
1546       A->ops->mult             = MatMult_MPISBAIJ;
1547       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1548       A->ops->multtranspose    = MatMult_MPISBAIJ;
1549       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1550     }
1551 #endif
1552     break;
1553   case MAT_STRUCTURALLY_SYMMETRIC:
1554     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1555     break;
1556   case MAT_IGNORE_LOWER_TRIANGULAR:
1557   case MAT_ERROR_LOWER_TRIANGULAR:
1558     aA->ignore_ltriangular = flg;
1559     break;
1560   case MAT_GETROW_UPPERTRIANGULAR:
1561     aA->getrow_utriangular = flg;
1562     break;
1563   default:
1564     break;
1565   }
1566   PetscFunctionReturn(PETSC_SUCCESS);
1567 }
1568 
1569 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1570 {
1571   PetscFunctionBegin;
1572   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1573   if (reuse == MAT_INITIAL_MATRIX) {
1574     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1575   } else if (reuse == MAT_REUSE_MATRIX) {
1576     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1577   }
1578   PetscFunctionReturn(PETSC_SUCCESS);
1579 }
1580 
1581 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1582 {
1583   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1584   Mat           a = baij->A, b = baij->B;
1585   PetscInt      nv, m, n;
1586   PetscBool     flg;
1587 
1588   PetscFunctionBegin;
1589   if (ll != rr) {
1590     PetscCall(VecEqual(ll, rr, &flg));
1591     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1592   }
1593   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1594 
1595   PetscCall(MatGetLocalSize(mat, &m, &n));
1596   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1597 
1598   PetscCall(VecGetLocalSize(rr, &nv));
1599   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1600 
1601   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1602 
1603   /* left diagonalscale the off-diagonal part */
1604   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1605 
1606   /* scale the diagonal part */
1607   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1608 
1609   /* right diagonalscale the off-diagonal part */
1610   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1611   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1616 {
1617   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1618 
1619   PetscFunctionBegin;
1620   PetscCall(MatSetUnfactored(a->A));
1621   PetscFunctionReturn(PETSC_SUCCESS);
1622 }
1623 
1624 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1625 
1626 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1627 {
1628   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1629   Mat           a, b, c, d;
1630   PetscBool     flg;
1631 
1632   PetscFunctionBegin;
1633   a = matA->A;
1634   b = matA->B;
1635   c = matB->A;
1636   d = matB->B;
1637 
1638   PetscCall(MatEqual(a, c, &flg));
1639   if (flg) PetscCall(MatEqual(b, d, &flg));
1640   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1641   PetscFunctionReturn(PETSC_SUCCESS);
1642 }
1643 
1644 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1645 {
1646   PetscBool isbaij;
1647 
1648   PetscFunctionBegin;
1649   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1650   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1651   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1652   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1653     PetscCall(MatGetRowUpperTriangular(A));
1654     PetscCall(MatCopy_Basic(A, B, str));
1655     PetscCall(MatRestoreRowUpperTriangular(A));
1656   } else {
1657     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1658     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1659 
1660     PetscCall(MatCopy(a->A, b->A, str));
1661     PetscCall(MatCopy(a->B, b->B, str));
1662   }
1663   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1664   PetscFunctionReturn(PETSC_SUCCESS);
1665 }
1666 
1667 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1668 {
1669   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1670   PetscBLASInt  bnz, one                          = 1;
1671   Mat_SeqSBAIJ *xa, *ya;
1672   Mat_SeqBAIJ  *xb, *yb;
1673 
1674   PetscFunctionBegin;
1675   if (str == SAME_NONZERO_PATTERN) {
1676     PetscScalar alpha = a;
1677     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1678     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1679     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1680     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1681     xb = (Mat_SeqBAIJ *)xx->B->data;
1682     yb = (Mat_SeqBAIJ *)yy->B->data;
1683     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1684     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1685     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1686   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1687     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1688     PetscCall(MatAXPY_Basic(Y, a, X, str));
1689     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1690   } else {
1691     Mat       B;
1692     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1693     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1694     PetscCall(MatGetRowUpperTriangular(X));
1695     PetscCall(MatGetRowUpperTriangular(Y));
1696     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1697     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1698     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1699     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1700     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1701     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1702     PetscCall(MatSetType(B, MATMPISBAIJ));
1703     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1704     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1705     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1706     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1707     PetscCall(MatHeaderMerge(Y, &B));
1708     PetscCall(PetscFree(nnz_d));
1709     PetscCall(PetscFree(nnz_o));
1710     PetscCall(MatRestoreRowUpperTriangular(X));
1711     PetscCall(MatRestoreRowUpperTriangular(Y));
1712   }
1713   PetscFunctionReturn(PETSC_SUCCESS);
1714 }
1715 
1716 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1717 {
1718   PetscInt  i;
1719   PetscBool flg;
1720 
1721   PetscFunctionBegin;
1722   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1723   for (i = 0; i < n; i++) {
1724     PetscCall(ISEqual(irow[i], icol[i], &flg));
1725     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1726   }
1727   PetscFunctionReturn(PETSC_SUCCESS);
1728 }
1729 
1730 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1731 {
1732   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1733   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1734 
1735   PetscFunctionBegin;
1736   if (!Y->preallocated) {
1737     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1738   } else if (!aij->nz) {
1739     PetscInt nonew = aij->nonew;
1740     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1741     aij->nonew = nonew;
1742   }
1743   PetscCall(MatShift_Basic(Y, a));
1744   PetscFunctionReturn(PETSC_SUCCESS);
1745 }
1746 
1747 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1748 {
1749   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1750 
1751   PetscFunctionBegin;
1752   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1753   PetscCall(MatMissingDiagonal(a->A, missing, d));
1754   if (d) {
1755     PetscInt rstart;
1756     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1757     *d += rstart / A->rmap->bs;
1758   }
1759   PetscFunctionReturn(PETSC_SUCCESS);
1760 }
1761 
1762 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1763 {
1764   PetscFunctionBegin;
1765   *a = ((Mat_MPISBAIJ *)A->data)->A;
1766   PetscFunctionReturn(PETSC_SUCCESS);
1767 }
1768 
1769 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1770 {
1771   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1772 
1773   PetscFunctionBegin;
1774   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1775   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1776   PetscFunctionReturn(PETSC_SUCCESS);
1777 }
1778 
1779 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1780 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1781 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1782 
1783 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1784                                        MatGetRow_MPISBAIJ,
1785                                        MatRestoreRow_MPISBAIJ,
1786                                        MatMult_MPISBAIJ,
1787                                        /*  4*/ MatMultAdd_MPISBAIJ,
1788                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1789                                        MatMultAdd_MPISBAIJ,
1790                                        NULL,
1791                                        NULL,
1792                                        NULL,
1793                                        /* 10*/ NULL,
1794                                        NULL,
1795                                        NULL,
1796                                        MatSOR_MPISBAIJ,
1797                                        MatTranspose_MPISBAIJ,
1798                                        /* 15*/ MatGetInfo_MPISBAIJ,
1799                                        MatEqual_MPISBAIJ,
1800                                        MatGetDiagonal_MPISBAIJ,
1801                                        MatDiagonalScale_MPISBAIJ,
1802                                        MatNorm_MPISBAIJ,
1803                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1804                                        MatAssemblyEnd_MPISBAIJ,
1805                                        MatSetOption_MPISBAIJ,
1806                                        MatZeroEntries_MPISBAIJ,
1807                                        /* 24*/ NULL,
1808                                        NULL,
1809                                        NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        /* 29*/ MatSetUp_MPI_Hash,
1813                                        NULL,
1814                                        NULL,
1815                                        MatGetDiagonalBlock_MPISBAIJ,
1816                                        NULL,
1817                                        /* 34*/ MatDuplicate_MPISBAIJ,
1818                                        NULL,
1819                                        NULL,
1820                                        NULL,
1821                                        NULL,
1822                                        /* 39*/ MatAXPY_MPISBAIJ,
1823                                        MatCreateSubMatrices_MPISBAIJ,
1824                                        MatIncreaseOverlap_MPISBAIJ,
1825                                        MatGetValues_MPISBAIJ,
1826                                        MatCopy_MPISBAIJ,
1827                                        /* 44*/ NULL,
1828                                        MatScale_MPISBAIJ,
1829                                        MatShift_MPISBAIJ,
1830                                        NULL,
1831                                        NULL,
1832                                        /* 49*/ NULL,
1833                                        NULL,
1834                                        NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        /* 54*/ NULL,
1838                                        NULL,
1839                                        MatSetUnfactored_MPISBAIJ,
1840                                        NULL,
1841                                        MatSetValuesBlocked_MPISBAIJ,
1842                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1843                                        NULL,
1844                                        NULL,
1845                                        NULL,
1846                                        NULL,
1847                                        /* 64*/ NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        MatGetRowMaxAbs_MPISBAIJ,
1852                                        /* 69*/ NULL,
1853                                        MatConvert_MPISBAIJ_Basic,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        NULL,
1861                                        MatLoad_MPISBAIJ,
1862                                        /* 79*/ NULL,
1863                                        NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        /* 84*/ NULL,
1868                                        NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        /* 89*/ NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        NULL,
1876                                        MatConjugate_MPISBAIJ,
1877                                        /* 94*/ NULL,
1878                                        NULL,
1879                                        MatRealPart_MPISBAIJ,
1880                                        MatImaginaryPart_MPISBAIJ,
1881                                        MatGetRowUpperTriangular_MPISBAIJ,
1882                                        /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ,
1883                                        NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        /*104*/ MatMissingDiagonal_MPISBAIJ,
1888                                        NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        /*109*/ NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        /*114*/ NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        /*119*/ NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        NULL,
1907                                        /*124*/ NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        MatSetBlockSizes_Default,
1911                                        NULL,
1912                                        /*129*/ NULL,
1913                                        NULL,
1914                                        MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1915                                        NULL,
1916                                        NULL,
1917                                        /*134*/ NULL,
1918                                        NULL,
1919                                        NULL,
1920                                        MatEliminateZeros_MPISBAIJ,
1921                                        NULL,
1922                                        /*139*/ NULL,
1923                                        NULL,
1924                                        NULL,
1925                                        MatCopyHashToXAIJ_MPI_Hash,
1926                                        NULL};
1927 
1928 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1929 {
1930   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1931   PetscInt      i, mbs, Mbs;
1932   PetscMPIInt   size;
1933 
1934   PetscFunctionBegin;
1935   if (B->hash_active) {
1936     B->ops[0]      = b->cops;
1937     B->hash_active = PETSC_FALSE;
1938   }
1939   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1940   PetscCall(MatSetBlockSize(B, bs));
1941   PetscCall(PetscLayoutSetUp(B->rmap));
1942   PetscCall(PetscLayoutSetUp(B->cmap));
1943   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1944   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1945   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1946 
1947   mbs = B->rmap->n / bs;
1948   Mbs = B->rmap->N / bs;
1949   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1950 
1951   B->rmap->bs = bs;
1952   b->bs2      = bs * bs;
1953   b->mbs      = mbs;
1954   b->Mbs      = Mbs;
1955   b->nbs      = B->cmap->n / bs;
1956   b->Nbs      = B->cmap->N / bs;
1957 
1958   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1959   b->rstartbs = B->rmap->rstart / bs;
1960   b->rendbs   = B->rmap->rend / bs;
1961 
1962   b->cstartbs = B->cmap->rstart / bs;
1963   b->cendbs   = B->cmap->rend / bs;
1964 
1965 #if defined(PETSC_USE_CTABLE)
1966   PetscCall(PetscHMapIDestroy(&b->colmap));
1967 #else
1968   PetscCall(PetscFree(b->colmap));
1969 #endif
1970   PetscCall(PetscFree(b->garray));
1971   PetscCall(VecDestroy(&b->lvec));
1972   PetscCall(VecScatterDestroy(&b->Mvctx));
1973   PetscCall(VecDestroy(&b->slvec0));
1974   PetscCall(VecDestroy(&b->slvec0b));
1975   PetscCall(VecDestroy(&b->slvec1));
1976   PetscCall(VecDestroy(&b->slvec1a));
1977   PetscCall(VecDestroy(&b->slvec1b));
1978   PetscCall(VecScatterDestroy(&b->sMvctx));
1979 
1980   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1981 
1982   MatSeqXAIJGetOptions_Private(b->B);
1983   PetscCall(MatDestroy(&b->B));
1984   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1985   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1986   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1987   MatSeqXAIJRestoreOptions_Private(b->B);
1988 
1989   MatSeqXAIJGetOptions_Private(b->A);
1990   PetscCall(MatDestroy(&b->A));
1991   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1992   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1993   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1994   MatSeqXAIJRestoreOptions_Private(b->A);
1995 
1996   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1997   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1998 
1999   B->preallocated  = PETSC_TRUE;
2000   B->was_assembled = PETSC_FALSE;
2001   B->assembled     = PETSC_FALSE;
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2006 {
2007   PetscInt        m, rstart, cend;
2008   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2009   const PetscInt *JJ          = NULL;
2010   PetscScalar    *values      = NULL;
2011   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2012   PetscBool       nooffprocentries;
2013 
2014   PetscFunctionBegin;
2015   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2016   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2017   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2018   PetscCall(PetscLayoutSetUp(B->rmap));
2019   PetscCall(PetscLayoutSetUp(B->cmap));
2020   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2021   m      = B->rmap->n / bs;
2022   rstart = B->rmap->rstart / bs;
2023   cend   = B->cmap->rend / bs;
2024 
2025   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2026   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2027   for (i = 0; i < m; i++) {
2028     nz = ii[i + 1] - ii[i];
2029     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2030     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2031     JJ = jj + ii[i];
2032     bd = 0;
2033     for (j = 0; j < nz; j++) {
2034       if (*JJ >= i + rstart) break;
2035       JJ++;
2036       bd++;
2037     }
2038     d = 0;
2039     for (; j < nz; j++) {
2040       if (*JJ++ >= cend) break;
2041       d++;
2042     }
2043     d_nnz[i] = d;
2044     o_nnz[i] = nz - d - bd;
2045     nz       = nz - bd;
2046     nz_max   = PetscMax(nz_max, nz);
2047   }
2048   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2049   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2050   PetscCall(PetscFree2(d_nnz, o_nnz));
2051 
2052   values = (PetscScalar *)V;
2053   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2054   for (i = 0; i < m; i++) {
2055     PetscInt        row   = i + rstart;
2056     PetscInt        ncols = ii[i + 1] - ii[i];
2057     const PetscInt *icols = jj + ii[i];
2058     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2059       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2060       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2061     } else { /* block ordering does not match so we can only insert one block at a time. */
2062       PetscInt j;
2063       for (j = 0; j < ncols; j++) {
2064         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2065         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2066       }
2067     }
2068   }
2069 
2070   if (!V) PetscCall(PetscFree(values));
2071   nooffprocentries    = B->nooffprocentries;
2072   B->nooffprocentries = PETSC_TRUE;
2073   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2074   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2075   B->nooffprocentries = nooffprocentries;
2076 
2077   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 /*MC
2082    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2083    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2084    the matrix is stored.
2085 
2086    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2087    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2088 
2089    Options Database Key:
2090 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2091 
2092    Level: beginner
2093 
2094    Note:
2095      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2096      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2097 
2098 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2099 M*/
2100 
2101 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2102 {
2103   Mat_MPISBAIJ *b;
2104   PetscBool     flg = PETSC_FALSE;
2105 
2106   PetscFunctionBegin;
2107   PetscCall(PetscNew(&b));
2108   B->data   = (void *)b;
2109   B->ops[0] = MatOps_Values;
2110 
2111   B->ops->destroy = MatDestroy_MPISBAIJ;
2112   B->ops->view    = MatView_MPISBAIJ;
2113   B->assembled    = PETSC_FALSE;
2114   B->insertmode   = NOT_SET_VALUES;
2115 
2116   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2117   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2118 
2119   /* build local table of row and column ownerships */
2120   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2121 
2122   /* build cache for off array entries formed */
2123   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2124 
2125   b->donotstash  = PETSC_FALSE;
2126   b->colmap      = NULL;
2127   b->garray      = NULL;
2128   b->roworiented = PETSC_TRUE;
2129 
2130   /* stuff used in block assembly */
2131   b->barray = NULL;
2132 
2133   /* stuff used for matrix vector multiply */
2134   b->lvec    = NULL;
2135   b->Mvctx   = NULL;
2136   b->slvec0  = NULL;
2137   b->slvec0b = NULL;
2138   b->slvec1  = NULL;
2139   b->slvec1a = NULL;
2140   b->slvec1b = NULL;
2141   b->sMvctx  = NULL;
2142 
2143   /* stuff for MatGetRow() */
2144   b->rowindices   = NULL;
2145   b->rowvalues    = NULL;
2146   b->getrowactive = PETSC_FALSE;
2147 
2148   /* hash table stuff */
2149   b->ht           = NULL;
2150   b->hd           = NULL;
2151   b->ht_size      = 0;
2152   b->ht_flag      = PETSC_FALSE;
2153   b->ht_fact      = 0;
2154   b->ht_total_ct  = 0;
2155   b->ht_insert_ct = 0;
2156 
2157   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2158   b->ijonly = PETSC_FALSE;
2159 
2160   b->in_loc = NULL;
2161   b->v_loc  = NULL;
2162   b->n_loc  = 0;
2163 
2164   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2165   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2166   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2167   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2168 #if defined(PETSC_HAVE_ELEMENTAL)
2169   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2170 #endif
2171 #if defined(PETSC_HAVE_SCALAPACK)
2172   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2173 #endif
2174   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2175   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2176 
2177   B->symmetric                   = PETSC_BOOL3_TRUE;
2178   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2179   B->symmetry_eternal            = PETSC_TRUE;
2180   B->structural_symmetry_eternal = PETSC_TRUE;
2181 #if defined(PETSC_USE_COMPLEX)
2182   B->hermitian = PETSC_BOOL3_FALSE;
2183 #else
2184   B->hermitian = PETSC_BOOL3_TRUE;
2185 #endif
2186 
2187   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2188   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2189   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2190   if (flg) {
2191     PetscReal fact = 1.39;
2192     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2193     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2194     if (fact <= 1.0) fact = 1.39;
2195     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2196     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2197   }
2198   PetscOptionsEnd();
2199   PetscFunctionReturn(PETSC_SUCCESS);
2200 }
2201 
2202 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2203 /*MC
2204    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2205 
2206    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2207    and `MATMPISBAIJ` otherwise.
2208 
2209    Options Database Key:
2210 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2211 
2212   Level: beginner
2213 
2214 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2215 M*/
2216 
2217 /*@
2218   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2219   the user should preallocate the matrix storage by setting the parameters
2220   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2221   performance can be increased by more than a factor of 50.
2222 
2223   Collective
2224 
2225   Input Parameters:
2226 + B     - the matrix
2227 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2228           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2229 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2230           submatrix  (same for all local rows)
2231 . d_nnz - array containing the number of block nonzeros in the various block rows
2232           in the upper triangular and diagonal part of the in diagonal portion of the local
2233           (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2234           for the diagonal entry and set a value even if it is zero.
2235 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2236           submatrix (same for all local rows).
2237 - o_nnz - array containing the number of nonzeros in the various block rows of the
2238           off-diagonal portion of the local submatrix that is right of the diagonal
2239           (possibly different for each block row) or `NULL`.
2240 
2241   Options Database Keys:
2242 + -mat_no_unroll  - uses code that does not unroll the loops in the
2243                     block calculations (much slower)
2244 - -mat_block_size - size of the blocks to use
2245 
2246   Level: intermediate
2247 
2248   Notes:
2249 
2250   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2251   than it must be used on all processors that share the object for that argument.
2252 
2253   If the *_nnz parameter is given then the *_nz parameter is ignored
2254 
2255   Storage Information:
2256   For a square global matrix we define each processor's diagonal portion
2257   to be its local rows and the corresponding columns (a square submatrix);
2258   each processor's off-diagonal portion encompasses the remainder of the
2259   local matrix (a rectangular submatrix).
2260 
2261   The user can specify preallocated storage for the diagonal part of
2262   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2263   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2264   memory allocation.  Likewise, specify preallocated storage for the
2265   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2266 
2267   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2268   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2269   You can also run with the option `-info` and look for messages with the string
2270   malloc in them to see if additional memory allocation was needed.
2271 
2272   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2273   the figure below we depict these three local rows and all columns (0-11).
2274 
2275 .vb
2276            0 1 2 3 4 5 6 7 8 9 10 11
2277           --------------------------
2278    row 3  |. . . d d d o o o o  o  o
2279    row 4  |. . . d d d o o o o  o  o
2280    row 5  |. . . d d d o o o o  o  o
2281           --------------------------
2282 .ve
2283 
2284   Thus, any entries in the d locations are stored in the d (diagonal)
2285   submatrix, and any entries in the o locations are stored in the
2286   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2287   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2288 
2289   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2290   plus the diagonal part of the d matrix,
2291   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2292 
2293   In general, for PDE problems in which most nonzeros are near the diagonal,
2294   one expects `d_nz` >> `o_nz`.
2295 
2296 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2297 @*/
2298 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2299 {
2300   PetscFunctionBegin;
2301   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2302   PetscValidType(B, 1);
2303   PetscValidLogicalCollectiveInt(B, bs, 2);
2304   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2305   PetscFunctionReturn(PETSC_SUCCESS);
2306 }
2307 
2308 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2309 /*@
2310   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2311   (block compressed row).  For good matrix assembly performance
2312   the user should preallocate the matrix storage by setting the parameters
2313   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2314 
2315   Collective
2316 
2317   Input Parameters:
2318 + comm  - MPI communicator
2319 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2320           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2321 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2322           This value should be the same as the local size used in creating the
2323           y vector for the matrix-vector product y = Ax.
2324 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2325           This value should be the same as the local size used in creating the
2326           x vector for the matrix-vector product y = Ax.
2327 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2328 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2329 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2330           submatrix (same for all local rows)
2331 . d_nnz - array containing the number of block nonzeros in the various block rows
2332           in the upper triangular portion of the in diagonal portion of the local
2333           (possibly different for each block block row) or `NULL`.
2334           If you plan to factor the matrix you must leave room for the diagonal entry and
2335           set its value even if it is zero.
2336 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2337           submatrix (same for all local rows).
2338 - o_nnz - array containing the number of nonzeros in the various block rows of the
2339           off-diagonal portion of the local submatrix (possibly different for
2340           each block row) or `NULL`.
2341 
2342   Output Parameter:
2343 . A - the matrix
2344 
2345   Options Database Keys:
2346 + -mat_no_unroll  - uses code that does not unroll the loops in the
2347                     block calculations (much slower)
2348 . -mat_block_size - size of the blocks to use
2349 - -mat_mpi        - use the parallel matrix data structures even on one processor
2350                     (defaults to using SeqBAIJ format on one processor)
2351 
2352   Level: intermediate
2353 
2354   Notes:
2355   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2356   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2357   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2358 
2359   The number of rows and columns must be divisible by blocksize.
2360   This matrix type does not support complex Hermitian operation.
2361 
2362   The user MUST specify either the local or global matrix dimensions
2363   (possibly both).
2364 
2365   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2366   than it must be used on all processors that share the object for that argument.
2367 
2368   If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2369   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2370 
2371   If the *_nnz parameter is given then the *_nz parameter is ignored
2372 
2373   Storage Information:
2374   For a square global matrix we define each processor's diagonal portion
2375   to be its local rows and the corresponding columns (a square submatrix);
2376   each processor's off-diagonal portion encompasses the remainder of the
2377   local matrix (a rectangular submatrix).
2378 
2379   The user can specify preallocated storage for the diagonal part of
2380   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2381   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2382   memory allocation. Likewise, specify preallocated storage for the
2383   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2384 
2385   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2386   the figure below we depict these three local rows and all columns (0-11).
2387 
2388 .vb
2389            0 1 2 3 4 5 6 7 8 9 10 11
2390           --------------------------
2391    row 3  |. . . d d d o o o o  o  o
2392    row 4  |. . . d d d o o o o  o  o
2393    row 5  |. . . d d d o o o o  o  o
2394           --------------------------
2395 .ve
2396 
2397   Thus, any entries in the d locations are stored in the d (diagonal)
2398   submatrix, and any entries in the o locations are stored in the
2399   o (off-diagonal) submatrix. Note that the d matrix is stored in
2400   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2401 
2402   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2403   plus the diagonal part of the d matrix,
2404   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2405   In general, for PDE problems in which most nonzeros are near the diagonal,
2406   one expects `d_nz` >> `o_nz`.
2407 
2408 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2409           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2410 @*/
2411 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2412 {
2413   PetscMPIInt size;
2414 
2415   PetscFunctionBegin;
2416   PetscCall(MatCreate(comm, A));
2417   PetscCall(MatSetSizes(*A, m, n, M, N));
2418   PetscCallMPI(MPI_Comm_size(comm, &size));
2419   if (size > 1) {
2420     PetscCall(MatSetType(*A, MATMPISBAIJ));
2421     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2422   } else {
2423     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2424     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2425   }
2426   PetscFunctionReturn(PETSC_SUCCESS);
2427 }
2428 
2429 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2430 {
2431   Mat           mat;
2432   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2433   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2434   PetscScalar  *array;
2435 
2436   PetscFunctionBegin;
2437   *newmat = NULL;
2438 
2439   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2440   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2441   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2442   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2443   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2444 
2445   if (matin->hash_active) {
2446     PetscCall(MatSetUp(mat));
2447   } else {
2448     mat->factortype   = matin->factortype;
2449     mat->preallocated = PETSC_TRUE;
2450     mat->assembled    = PETSC_TRUE;
2451     mat->insertmode   = NOT_SET_VALUES;
2452 
2453     a      = (Mat_MPISBAIJ *)mat->data;
2454     a->bs2 = oldmat->bs2;
2455     a->mbs = oldmat->mbs;
2456     a->nbs = oldmat->nbs;
2457     a->Mbs = oldmat->Mbs;
2458     a->Nbs = oldmat->Nbs;
2459 
2460     a->size         = oldmat->size;
2461     a->rank         = oldmat->rank;
2462     a->donotstash   = oldmat->donotstash;
2463     a->roworiented  = oldmat->roworiented;
2464     a->rowindices   = NULL;
2465     a->rowvalues    = NULL;
2466     a->getrowactive = PETSC_FALSE;
2467     a->barray       = NULL;
2468     a->rstartbs     = oldmat->rstartbs;
2469     a->rendbs       = oldmat->rendbs;
2470     a->cstartbs     = oldmat->cstartbs;
2471     a->cendbs       = oldmat->cendbs;
2472 
2473     /* hash table stuff */
2474     a->ht           = NULL;
2475     a->hd           = NULL;
2476     a->ht_size      = 0;
2477     a->ht_flag      = oldmat->ht_flag;
2478     a->ht_fact      = oldmat->ht_fact;
2479     a->ht_total_ct  = 0;
2480     a->ht_insert_ct = 0;
2481 
2482     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2483     if (oldmat->colmap) {
2484 #if defined(PETSC_USE_CTABLE)
2485       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2486 #else
2487       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2488       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2489 #endif
2490     } else a->colmap = NULL;
2491 
2492     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2493       PetscCall(PetscMalloc1(len, &a->garray));
2494       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2495     } else a->garray = NULL;
2496 
2497     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2498     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2499     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2500 
2501     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2502     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2503 
2504     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2505     PetscCall(VecGetArray(a->slvec1, &array));
2506     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2507     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2508     PetscCall(VecRestoreArray(a->slvec1, &array));
2509     PetscCall(VecGetArray(a->slvec0, &array));
2510     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2511     PetscCall(VecRestoreArray(a->slvec0, &array));
2512 
2513     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2514     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2515     a->sMvctx = oldmat->sMvctx;
2516 
2517     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2518     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2519   }
2520   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2521   *newmat = mat;
2522   PetscFunctionReturn(PETSC_SUCCESS);
2523 }
2524 
2525 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2526 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2527 
2528 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2529 {
2530   PetscBool isbinary;
2531 
2532   PetscFunctionBegin;
2533   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2534   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2535   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2536   PetscFunctionReturn(PETSC_SUCCESS);
2537 }
2538 
2539 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2540 {
2541   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2542   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2543   PetscReal     atmp;
2544   PetscReal    *work, *svalues, *rvalues;
2545   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2546   PetscMPIInt   rank, size;
2547   PetscInt     *rowners_bs, count, source;
2548   PetscScalar  *va;
2549   MatScalar    *ba;
2550   MPI_Status    stat;
2551 
2552   PetscFunctionBegin;
2553   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2554   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2555   PetscCall(VecGetArray(v, &va));
2556 
2557   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2558   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2559 
2560   bs  = A->rmap->bs;
2561   mbs = a->mbs;
2562   Mbs = a->Mbs;
2563   ba  = b->a;
2564   bi  = b->i;
2565   bj  = b->j;
2566 
2567   /* find ownerships */
2568   rowners_bs = A->rmap->range;
2569 
2570   /* each proc creates an array to be distributed */
2571   PetscCall(PetscCalloc1(bs * Mbs, &work));
2572 
2573   /* row_max for B */
2574   if (rank != size - 1) {
2575     for (i = 0; i < mbs; i++) {
2576       ncols = bi[1] - bi[0];
2577       bi++;
2578       brow = bs * i;
2579       for (j = 0; j < ncols; j++) {
2580         bcol = bs * (*bj);
2581         for (kcol = 0; kcol < bs; kcol++) {
2582           col = bcol + kcol;           /* local col index */
2583           col += rowners_bs[rank + 1]; /* global col index */
2584           for (krow = 0; krow < bs; krow++) {
2585             atmp = PetscAbsScalar(*ba);
2586             ba++;
2587             row = brow + krow; /* local row index */
2588             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2589             if (work[col] < atmp) work[col] = atmp;
2590           }
2591         }
2592         bj++;
2593       }
2594     }
2595 
2596     /* send values to its owners */
2597     for (PetscMPIInt dest = rank + 1; dest < size; dest++) {
2598       svalues = work + rowners_bs[dest];
2599       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2600       PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2601     }
2602   }
2603 
2604   /* receive values */
2605   if (rank) {
2606     rvalues = work;
2607     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2608     for (source = 0; source < rank; source++) {
2609       PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2610       /* process values */
2611       for (i = 0; i < count; i++) {
2612         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2613       }
2614     }
2615   }
2616 
2617   PetscCall(VecRestoreArray(v, &va));
2618   PetscCall(PetscFree(work));
2619   PetscFunctionReturn(PETSC_SUCCESS);
2620 }
2621 
2622 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2623 {
2624   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2625   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2626   PetscScalar       *x, *ptr, *from;
2627   Vec                bb1;
2628   const PetscScalar *b;
2629 
2630   PetscFunctionBegin;
2631   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2632   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2633 
2634   if (flag == SOR_APPLY_UPPER) {
2635     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2636     PetscFunctionReturn(PETSC_SUCCESS);
2637   }
2638 
2639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2640     if (flag & SOR_ZERO_INITIAL_GUESS) {
2641       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2642       its--;
2643     }
2644 
2645     PetscCall(VecDuplicate(bb, &bb1));
2646     while (its--) {
2647       /* lower triangular part: slvec0b = - B^T*xx */
2648       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2649 
2650       /* copy xx into slvec0a */
2651       PetscCall(VecGetArray(mat->slvec0, &ptr));
2652       PetscCall(VecGetArray(xx, &x));
2653       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2654       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2655 
2656       PetscCall(VecScale(mat->slvec0, -1.0));
2657 
2658       /* copy bb into slvec1a */
2659       PetscCall(VecGetArray(mat->slvec1, &ptr));
2660       PetscCall(VecGetArrayRead(bb, &b));
2661       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2662       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2663 
2664       /* set slvec1b = 0 */
2665       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2666       PetscCall(VecZeroEntries(mat->slvec1b));
2667 
2668       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2669       PetscCall(VecRestoreArray(xx, &x));
2670       PetscCall(VecRestoreArrayRead(bb, &b));
2671       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2672 
2673       /* upper triangular part: bb1 = bb1 - B*x */
2674       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2675 
2676       /* local diagonal sweep */
2677       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2678     }
2679     PetscCall(VecDestroy(&bb1));
2680   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2681     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2682   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2683     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2684   } else if (flag & SOR_EISENSTAT) {
2685     Vec                xx1;
2686     PetscBool          hasop;
2687     const PetscScalar *diag;
2688     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2689     PetscInt           i, n;
2690 
2691     if (!mat->xx1) {
2692       PetscCall(VecDuplicate(bb, &mat->xx1));
2693       PetscCall(VecDuplicate(bb, &mat->bb1));
2694     }
2695     xx1 = mat->xx1;
2696     bb1 = mat->bb1;
2697 
2698     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2699 
2700     if (!mat->diag) {
2701       /* this is wrong for same matrix with new nonzero values */
2702       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2703       PetscCall(MatGetDiagonal(matin, mat->diag));
2704     }
2705     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2706 
2707     if (hasop) {
2708       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2709       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2710     } else {
2711       /*
2712           These two lines are replaced by code that may be a bit faster for a good compiler
2713       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2714       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2715       */
2716       PetscCall(VecGetArray(mat->slvec1a, &sl));
2717       PetscCall(VecGetArrayRead(mat->diag, &diag));
2718       PetscCall(VecGetArrayRead(bb, &b));
2719       PetscCall(VecGetArray(xx, &x));
2720       PetscCall(VecGetLocalSize(xx, &n));
2721       if (omega == 1.0) {
2722         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2723         PetscCall(PetscLogFlops(2.0 * n));
2724       } else {
2725         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2726         PetscCall(PetscLogFlops(3.0 * n));
2727       }
2728       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2729       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2730       PetscCall(VecRestoreArrayRead(bb, &b));
2731       PetscCall(VecRestoreArray(xx, &x));
2732     }
2733 
2734     /* multiply off-diagonal portion of matrix */
2735     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2736     PetscCall(VecZeroEntries(mat->slvec1b));
2737     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2738     PetscCall(VecGetArray(mat->slvec0, &from));
2739     PetscCall(VecGetArray(xx, &x));
2740     PetscCall(PetscArraycpy(from, x, bs * mbs));
2741     PetscCall(VecRestoreArray(mat->slvec0, &from));
2742     PetscCall(VecRestoreArray(xx, &x));
2743     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2744     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2745     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2746 
2747     /* local sweep */
2748     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2749     PetscCall(VecAXPY(xx, 1.0, xx1));
2750   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2751   PetscFunctionReturn(PETSC_SUCCESS);
2752 }
2753 
2754 /*@
2755   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2756 
2757   Collective
2758 
2759   Input Parameters:
2760 + comm - MPI communicator
2761 . bs   - the block size, only a block size of 1 is supported
2762 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2763 . n    - This value should be the same as the local size used in creating the
2764          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2765          calculated if `N` is given) For square matrices `n` is almost always `m`.
2766 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2767 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2768 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2769 . j    - column indices
2770 - a    - matrix values
2771 
2772   Output Parameter:
2773 . mat - the matrix
2774 
2775   Level: intermediate
2776 
2777   Notes:
2778   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2779   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2780   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2781 
2782   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2783 
2784 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2785           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2786 @*/
2787 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2788 {
2789   PetscFunctionBegin;
2790   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2791   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2792   PetscCall(MatCreate(comm, mat));
2793   PetscCall(MatSetSizes(*mat, m, n, M, N));
2794   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2795   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2796   PetscFunctionReturn(PETSC_SUCCESS);
2797 }
2798 
2799 /*@
2800   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2801 
2802   Collective
2803 
2804   Input Parameters:
2805 + B  - the matrix
2806 . bs - the block size
2807 . i  - the indices into `j` for the start of each local row (indices start with zero)
2808 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2809 - v  - optional values in the matrix, pass `NULL` if not provided
2810 
2811   Level: advanced
2812 
2813   Notes:
2814   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2815   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2816   called this routine.
2817 
2818   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2819   and usually the numerical values as well
2820 
2821   Any entries passed in that are below the diagonal are ignored
2822 
2823 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2824           `MatCreateMPISBAIJWithArrays()`
2825 @*/
2826 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2827 {
2828   PetscFunctionBegin;
2829   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2830   PetscFunctionReturn(PETSC_SUCCESS);
2831 }
2832 
2833 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2834 {
2835   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2836   PetscInt    *indx;
2837   PetscScalar *values;
2838 
2839   PetscFunctionBegin;
2840   PetscCall(MatGetSize(inmat, &m, &N));
2841   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2842     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2843     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2844     PetscInt     *bindx, rmax = a->rmax, j;
2845     PetscMPIInt   rank, size;
2846 
2847     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2848     mbs = m / bs;
2849     Nbs = N / cbs;
2850     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2851     nbs = n / cbs;
2852 
2853     PetscCall(PetscMalloc1(rmax, &bindx));
2854     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2855 
2856     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2857     PetscCallMPI(MPI_Comm_rank(comm, &size));
2858     if (rank == size - 1) {
2859       /* Check sum(nbs) = Nbs */
2860       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2861     }
2862 
2863     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2864     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2865     for (i = 0; i < mbs; i++) {
2866       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2867       nnz = nnz / bs;
2868       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2869       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2870       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2871     }
2872     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2873     PetscCall(PetscFree(bindx));
2874 
2875     PetscCall(MatCreate(comm, outmat));
2876     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2877     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2878     PetscCall(MatSetType(*outmat, MATSBAIJ));
2879     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2880     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2881     MatPreallocateEnd(dnz, onz);
2882   }
2883 
2884   /* numeric phase */
2885   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2886   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2887 
2888   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2889   for (i = 0; i < m; i++) {
2890     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2891     Ii = i + rstart;
2892     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2893     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2894   }
2895   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2896   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2897   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2898   PetscFunctionReturn(PETSC_SUCCESS);
2899 }
2900