xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a289a2f2dff3362f6bfc0c874680f4e4d851e010)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237   a_noinsert:; \
238     ailen[brow] = nrow; \
239   } while (0)
240 
241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
242   do { \
243     brow = row / bs; \
244     rp   = bj + bi[brow]; \
245     ap   = ba + bs2 * bi[brow]; \
246     rmax = bimax[brow]; \
247     nrow = bilen[brow]; \
248     bcol = col / bs; \
249     ridx = row % bs; \
250     cidx = col % bs; \
251     low  = 0; \
252     high = nrow; \
253     while (high - low > 3) { \
254       t = (low + high) / 2; \
255       if (rp[t] > bcol) high = t; \
256       else low = t; \
257     } \
258     for (_i = low; _i < high; _i++) { \
259       if (rp[_i] > bcol) break; \
260       if (rp[_i] == bcol) { \
261         bap = ap + bs2 * _i + bs * cidx + ridx; \
262         if (addv == ADD_VALUES) *bap += value; \
263         else *bap = value; \
264         goto b_noinsert; \
265       } \
266     } \
267     if (b->nonew == 1) goto b_noinsert; \
268     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
269     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
270     N = nrow++ - 1; \
271     /* shift up all the later entries in this row */ \
272     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
273     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
274     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
275     rp[_i]                          = bcol; \
276     ap[bs2 * _i + bs * cidx + ridx] = value; \
277   b_noinsert:; \
278     bilen[brow] = nrow; \
279   } while (0)
280 
281 /* Only add/insert a(i,j) with i<=j (blocks).
282    Any a(i,j) with i>j input by user is ignored or generates an error
283 */
284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
285 {
286   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
287   MatScalar     value;
288   PetscBool     roworiented = baij->roworiented;
289   PetscInt      i, j, row, col;
290   PetscInt      rstart_orig = mat->rmap->rstart;
291   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
292   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
293 
294   /* Some Variables required in the macro */
295   Mat           A     = baij->A;
296   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)A->data;
297   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
298   MatScalar    *aa = a->a;
299 
300   Mat          B     = baij->B;
301   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
302   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
303   MatScalar   *ba = b->a;
304 
305   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
306   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
307   MatScalar *ap, *bap;
308 
309   /* for stash */
310   PetscInt   n_loc, *in_loc = NULL;
311   MatScalar *v_loc = NULL;
312 
313   PetscFunctionBegin;
314   if (!baij->donotstash) {
315     if (n > baij->n_loc) {
316       PetscCall(PetscFree(baij->in_loc));
317       PetscCall(PetscFree(baij->v_loc));
318       PetscCall(PetscMalloc1(n, &baij->in_loc));
319       PetscCall(PetscMalloc1(n, &baij->v_loc));
320 
321       baij->n_loc = n;
322     }
323     in_loc = baij->in_loc;
324     v_loc  = baij->v_loc;
325   }
326 
327   for (i = 0; i < m; i++) {
328     if (im[i] < 0) continue;
329     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
330     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
331       row = im[i] - rstart_orig;                     /* local row index */
332       for (j = 0; j < n; j++) {
333         if (im[i] / bs > in[j] / bs) {
334           PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
335           continue; /* ignore lower triangular blocks */
336         }
337         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
338           col  = in[j] - cstart_orig;                    /* local col index */
339           brow = row / bs;
340           bcol = col / bs;
341           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
342           if (roworiented) value = v[i * n + j];
343           else value = v[i + j * m];
344           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
345           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
346         } else if (in[j] < 0) {
347           continue;
348         } else {
349           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
350           /* off-diag entry (B) */
351           if (mat->was_assembled) {
352             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
353 #if defined(PETSC_USE_CTABLE)
354             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
355             col = col - 1;
356 #else
357             col = baij->colmap[in[j] / bs] - 1;
358 #endif
359             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
360               PetscCall(MatDisAssemble_MPISBAIJ(mat));
361               col = in[j];
362               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
363               B     = baij->B;
364               b     = (Mat_SeqBAIJ *)B->data;
365               bimax = b->imax;
366               bi    = b->i;
367               bilen = b->ilen;
368               bj    = b->j;
369               ba    = b->a;
370             } else col += in[j] % bs;
371           } else col = in[j];
372           if (roworiented) value = v[i * n + j];
373           else value = v[i + j * m];
374           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
375           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
376         }
377       }
378     } else { /* off processor entry */
379       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
380       if (!baij->donotstash) {
381         mat->assembled = PETSC_FALSE;
382         n_loc          = 0;
383         for (j = 0; j < n; j++) {
384           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
385           in_loc[n_loc] = in[j];
386           if (roworiented) {
387             v_loc[n_loc] = v[i * n + j];
388           } else {
389             v_loc[n_loc] = v[j * m + i];
390           }
391           n_loc++;
392         }
393         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
394       }
395     }
396   }
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
401 {
402   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
403   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
404   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
405   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
406   PetscBool          roworiented = a->roworiented;
407   const PetscScalar *value       = v;
408   MatScalar         *ap, *aa = a->a, *bap;
409 
410   PetscFunctionBegin;
411   if (col < row) {
412     PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
413     PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
414   }
415   rp    = aj + ai[row];
416   ap    = aa + bs2 * ai[row];
417   rmax  = imax[row];
418   nrow  = ailen[row];
419   value = v;
420   low   = 0;
421   high  = nrow;
422 
423   while (high - low > 7) {
424     t = (low + high) / 2;
425     if (rp[t] > col) high = t;
426     else low = t;
427   }
428   for (i = low; i < high; i++) {
429     if (rp[i] > col) break;
430     if (rp[i] == col) {
431       bap = ap + bs2 * i;
432       if (roworiented) {
433         if (is == ADD_VALUES) {
434           for (ii = 0; ii < bs; ii++) {
435             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
436           }
437         } else {
438           for (ii = 0; ii < bs; ii++) {
439             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
440           }
441         }
442       } else {
443         if (is == ADD_VALUES) {
444           for (ii = 0; ii < bs; ii++) {
445             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
446           }
447         } else {
448           for (ii = 0; ii < bs; ii++) {
449             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
450           }
451         }
452       }
453       goto noinsert2;
454     }
455   }
456   if (nonew == 1) goto noinsert2;
457   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
458   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
459   N = nrow++ - 1;
460   high++;
461   /* shift up all the later entries in this row */
462   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
463   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
464   rp[i] = col;
465   bap   = ap + bs2 * i;
466   if (roworiented) {
467     for (ii = 0; ii < bs; ii++) {
468       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
469     }
470   } else {
471     for (ii = 0; ii < bs; ii++) {
472       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
473     }
474   }
475 noinsert2:;
476   ailen[row] = nrow;
477   PetscFunctionReturn(PETSC_SUCCESS);
478 }
479 
480 /*
481    This routine is exactly duplicated in mpibaij.c
482 */
483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
484 {
485   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
486   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
487   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
488   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
489   PetscBool          roworiented = a->roworiented;
490   const PetscScalar *value       = v;
491   MatScalar         *ap, *aa = a->a, *bap;
492 
493   PetscFunctionBegin;
494   rp    = aj + ai[row];
495   ap    = aa + bs2 * ai[row];
496   rmax  = imax[row];
497   nrow  = ailen[row];
498   low   = 0;
499   high  = nrow;
500   value = v;
501   while (high - low > 7) {
502     t = (low + high) / 2;
503     if (rp[t] > col) high = t;
504     else low = t;
505   }
506   for (i = low; i < high; i++) {
507     if (rp[i] > col) break;
508     if (rp[i] == col) {
509       bap = ap + bs2 * i;
510       if (roworiented) {
511         if (is == ADD_VALUES) {
512           for (ii = 0; ii < bs; ii++) {
513             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
514           }
515         } else {
516           for (ii = 0; ii < bs; ii++) {
517             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
518           }
519         }
520       } else {
521         if (is == ADD_VALUES) {
522           for (ii = 0; ii < bs; ii++, value += bs) {
523             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
524             bap += bs;
525           }
526         } else {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
529             bap += bs;
530           }
531         }
532       }
533       goto noinsert2;
534     }
535   }
536   if (nonew == 1) goto noinsert2;
537   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
538   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
539   N = nrow++ - 1;
540   high++;
541   /* shift up all the later entries in this row */
542   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
543   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
544   rp[i] = col;
545   bap   = ap + bs2 * i;
546   if (roworiented) {
547     for (ii = 0; ii < bs; ii++) {
548       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
549     }
550   } else {
551     for (ii = 0; ii < bs; ii++) {
552       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
553     }
554   }
555 noinsert2:;
556   ailen[row] = nrow;
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 /*
561     This routine could be optimized by removing the need for the block copy below and passing stride information
562   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
563 */
564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
565 {
566   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
567   const MatScalar *value;
568   MatScalar       *barray      = baij->barray;
569   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
570   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
571   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
572   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
573 
574   PetscFunctionBegin;
575   if (!barray) {
576     PetscCall(PetscMalloc1(bs2, &barray));
577     baij->barray = barray;
578   }
579 
580   if (roworiented) {
581     stepval = (n - 1) * bs;
582   } else {
583     stepval = (m - 1) * bs;
584   }
585   for (i = 0; i < m; i++) {
586     if (im[i] < 0) continue;
587     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
588     if (im[i] >= rstart && im[i] < rend) {
589       row = im[i] - rstart;
590       for (j = 0; j < n; j++) {
591         if (im[i] > in[j]) {
592           PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
593           continue; /* ignore lower triangular blocks */
594         }
595         /* If NumCol = 1 then a copy is not required */
596         if (roworiented && n == 1) {
597           barray = (MatScalar *)v + i * bs2;
598         } else if ((!roworiented) && (m == 1)) {
599           barray = (MatScalar *)v + j * bs2;
600         } else { /* Here a copy is required */
601           if (roworiented) {
602             value = v + i * (stepval + bs) * bs + j * bs;
603           } else {
604             value = v + j * (stepval + bs) * bs + i * bs;
605           }
606           for (ii = 0; ii < bs; ii++, value += stepval) {
607             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
608           }
609           barray -= bs2;
610         }
611 
612         if (in[j] >= cstart && in[j] < cend) {
613           col = in[j] - cstart;
614           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
615         } else if (in[j] < 0) {
616           continue;
617         } else {
618           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
619           if (mat->was_assembled) {
620             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
621 
622 #if defined(PETSC_USE_CTABLE)
623             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
624             col = col < 1 ? -1 : (col - 1) / bs;
625 #else
626             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
627 #endif
628             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
629               PetscCall(MatDisAssemble_MPISBAIJ(mat));
630               col = in[j];
631             }
632           } else col = in[j];
633           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
634         }
635       }
636     } else {
637       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
651 {
652   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
653   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
654   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
655 
656   PetscFunctionBegin;
657   for (i = 0; i < m; i++) {
658     if (idxm[i] < 0) continue; /* negative row */
659     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
660     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
661     row = idxm[i] - bsrstart;
662     for (j = 0; j < n; j++) {
663       if (idxn[j] < 0) continue; /* negative column */
664       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
665       if (idxn[j] >= bscstart && idxn[j] < bscend) {
666         col = idxn[j] - bscstart;
667         PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
668       } else {
669         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
670 #if defined(PETSC_USE_CTABLE)
671         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
672         data--;
673 #else
674         data = baij->colmap[idxn[j] / bs] - 1;
675 #endif
676         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
677         else {
678           col = data + idxn[j] % bs;
679           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
680         }
681       }
682     }
683   }
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
688 {
689   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
690   PetscReal     sum[2], *lnorm2;
691 
692   PetscFunctionBegin;
693   if (baij->size == 1) {
694     PetscCall(MatNorm(baij->A, type, norm));
695   } else {
696     if (type == NORM_FROBENIUS) {
697       PetscCall(PetscMalloc1(2, &lnorm2));
698       PetscCall(MatNorm(baij->A, type, lnorm2));
699       *lnorm2 = (*lnorm2) * (*lnorm2);
700       lnorm2++; /* square power of norm(A) */
701       PetscCall(MatNorm(baij->B, type, lnorm2));
702       *lnorm2 = (*lnorm2) * (*lnorm2);
703       lnorm2--; /* square power of norm(B) */
704       PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
705       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
706       PetscCall(PetscFree(lnorm2));
707     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
708       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
709       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
710       PetscReal    *rsum, vabs;
711       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
712       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
713       MatScalar    *v;
714 
715       PetscCall(PetscCalloc1(mat->cmap->N, &rsum));
716       /* Amat */
717       v  = amat->a;
718       jj = amat->j;
719       for (brow = 0; brow < mbs; brow++) {
720         grow = bs * (rstart + brow);
721         nz   = amat->i[brow + 1] - amat->i[brow];
722         for (bcol = 0; bcol < nz; bcol++) {
723           gcol = bs * (rstart + *jj);
724           jj++;
725           for (col = 0; col < bs; col++) {
726             for (row = 0; row < bs; row++) {
727               vabs = PetscAbsScalar(*v);
728               v++;
729               rsum[gcol + col] += vabs;
730               /* non-diagonal block */
731               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
732             }
733           }
734         }
735         PetscCall(PetscLogFlops(nz * bs * bs));
736       }
737       /* Bmat */
738       v  = bmat->a;
739       jj = bmat->j;
740       for (brow = 0; brow < mbs; brow++) {
741         grow = bs * (rstart + brow);
742         nz   = bmat->i[brow + 1] - bmat->i[brow];
743         for (bcol = 0; bcol < nz; bcol++) {
744           gcol = bs * garray[*jj];
745           jj++;
746           for (col = 0; col < bs; col++) {
747             for (row = 0; row < bs; row++) {
748               vabs = PetscAbsScalar(*v);
749               v++;
750               rsum[gcol + col] += vabs;
751               rsum[grow + row] += vabs;
752             }
753           }
754         }
755         PetscCall(PetscLogFlops(nz * bs * bs));
756       }
757       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
758       *norm = 0.0;
759       for (col = 0; col < mat->cmap->N; col++) {
760         if (rsum[col] > *norm) *norm = rsum[col];
761       }
762       PetscCall(PetscFree(rsum));
763     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
764   }
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
769 {
770   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
771   PetscInt      nstash, reallocs;
772 
773   PetscFunctionBegin;
774   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
775 
776   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
777   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
778   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
779   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
780   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
781   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
782   PetscFunctionReturn(PETSC_SUCCESS);
783 }
784 
785 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
786 {
787   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
788   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
789   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
790   PetscInt     *row, *col;
791   PetscBool     all_assembled;
792   PetscMPIInt   n;
793   PetscBool     r1, r2, r3;
794   MatScalar    *val;
795 
796   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
797   PetscFunctionBegin;
798   if (!baij->donotstash && !mat->nooffprocentries) {
799     while (1) {
800       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
801       if (!flg) break;
802 
803       for (i = 0; i < n;) {
804         /* Now identify the consecutive vals belonging to the same row */
805         for (j = i, rstart = row[j]; j < n; j++) {
806           if (row[j] != rstart) break;
807         }
808         if (j < n) ncols = j - i;
809         else ncols = n - i;
810         /* Now assemble all these values with a single function call */
811         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
812         i = j;
813       }
814     }
815     PetscCall(MatStashScatterEnd_Private(&mat->stash));
816     /* Now process the block-stash. Since the values are stashed column-oriented,
817        set the row-oriented flag to column-oriented, and after MatSetValues()
818        restore the original flags */
819     r1 = baij->roworiented;
820     r2 = a->roworiented;
821     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
822 
823     baij->roworiented = PETSC_FALSE;
824     a->roworiented    = PETSC_FALSE;
825 
826     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */
827     while (1) {
828       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
829       if (!flg) break;
830 
831       for (i = 0; i < n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j = i, rstart = row[j]; j < n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j - i;
837         else ncols = n - i;
838         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
839         i = j;
840       }
841     }
842     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
843 
844     baij->roworiented = r1;
845     a->roworiented    = r2;
846 
847     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
848   }
849 
850   PetscCall(MatAssemblyBegin(baij->A, mode));
851   PetscCall(MatAssemblyEnd(baij->A, mode));
852 
853   /* determine if any process has disassembled, if so we must
854      also disassemble ourselves, in order that we may reassemble. */
855   /*
856      if nonzero structure of submatrix B cannot change then we know that
857      no process disassembled thus we can skip this stuff
858   */
859   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
860     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
861     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
862   }
863 
864   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
865   PetscCall(MatAssemblyBegin(baij->B, mode));
866   PetscCall(MatAssemblyEnd(baij->B, mode));
867 
868   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
869 
870   baij->rowvalues = NULL;
871 
872   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
873   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
874     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
875     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
876   }
877   PetscFunctionReturn(PETSC_SUCCESS);
878 }
879 
880 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
881 #include <petscdraw.h>
882 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
883 {
884   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
885   PetscInt          bs   = mat->rmap->bs;
886   PetscMPIInt       rank = baij->rank;
887   PetscBool         isascii, isdraw;
888   PetscViewer       sviewer;
889   PetscViewerFormat format;
890 
891   PetscFunctionBegin;
892   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
893   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
894   if (isascii) {
895     PetscCall(PetscViewerGetFormat(viewer, &format));
896     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
897       MatInfo info;
898       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
899       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
900       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
901       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
902                                                    mat->rmap->bs, info.memory));
903       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
904       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
905       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
906       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
907       PetscCall(PetscViewerFlush(viewer));
908       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
909       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
910       PetscCall(VecScatterView(baij->Mvctx, viewer));
911       PetscFunctionReturn(PETSC_SUCCESS);
912     } else if (format == PETSC_VIEWER_ASCII_INFO) {
913       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
914       PetscFunctionReturn(PETSC_SUCCESS);
915     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
916       PetscFunctionReturn(PETSC_SUCCESS);
917     }
918   }
919 
920   if (isdraw) {
921     PetscDraw draw;
922     PetscBool isnull;
923     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
924     PetscCall(PetscDrawIsNull(draw, &isnull));
925     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
926   }
927 
928   {
929     /* assemble the entire matrix onto first processor. */
930     Mat           A;
931     Mat_SeqSBAIJ *Aloc;
932     Mat_SeqBAIJ  *Bloc;
933     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
934     MatScalar    *a;
935     const char   *matname;
936 
937     /* Should this be the same type as mat? */
938     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
939     if (rank == 0) {
940       PetscCall(MatSetSizes(A, M, N, M, N));
941     } else {
942       PetscCall(MatSetSizes(A, 0, 0, M, N));
943     }
944     PetscCall(MatSetType(A, MATMPISBAIJ));
945     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
946     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
947 
948     /* copy over the A part */
949     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
950     ai   = Aloc->i;
951     aj   = Aloc->j;
952     a    = Aloc->a;
953     PetscCall(PetscMalloc1(bs, &rvals));
954 
955     for (i = 0; i < mbs; i++) {
956       rvals[0] = bs * (baij->rstartbs + i);
957       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
958       for (j = ai[i]; j < ai[i + 1]; j++) {
959         col = (baij->cstartbs + aj[j]) * bs;
960         for (k = 0; k < bs; k++) {
961           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
962           col++;
963           a += bs;
964         }
965       }
966     }
967     /* copy over the B part */
968     Bloc = (Mat_SeqBAIJ *)baij->B->data;
969     ai   = Bloc->i;
970     aj   = Bloc->j;
971     a    = Bloc->a;
972     for (i = 0; i < mbs; i++) {
973       rvals[0] = bs * (baij->rstartbs + i);
974       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
975       for (j = ai[i]; j < ai[i + 1]; j++) {
976         col = baij->garray[aj[j]] * bs;
977         for (k = 0; k < bs; k++) {
978           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
979           col++;
980           a += bs;
981         }
982       }
983     }
984     PetscCall(PetscFree(rvals));
985     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
986     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
987     /*
988        Everyone has to call to draw the matrix since the graphics waits are
989        synchronized across all processors that share the PetscDraw object
990     */
991     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
992     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
993     if (rank == 0) {
994       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
995       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
996     }
997     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
998     PetscCall(MatDestroy(&A));
999   }
1000   PetscFunctionReturn(PETSC_SUCCESS);
1001 }
1002 
1003 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1004 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1005 
1006 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1007 {
1008   PetscBool isascii, isdraw, issocket, isbinary;
1009 
1010   PetscFunctionBegin;
1011   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1012   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1013   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1014   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1015   if (isascii || isdraw || issocket) PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1016   else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1017   PetscFunctionReturn(PETSC_SUCCESS);
1018 }
1019 
1020 #if defined(PETSC_USE_COMPLEX)
1021 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1022 {
1023   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1024   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1025   PetscScalar       *from;
1026   const PetscScalar *x;
1027 
1028   PetscFunctionBegin;
1029   /* diagonal part */
1030   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1031   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1032   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1033   PetscCall(VecZeroEntries(a->slvec1b));
1034 
1035   /* subdiagonal part */
1036   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1037   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1038 
1039   /* copy x into the vec slvec0 */
1040   PetscCall(VecGetArray(a->slvec0, &from));
1041   PetscCall(VecGetArrayRead(xx, &x));
1042 
1043   PetscCall(PetscArraycpy(from, x, bs * mbs));
1044   PetscCall(VecRestoreArray(a->slvec0, &from));
1045   PetscCall(VecRestoreArrayRead(xx, &x));
1046 
1047   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1048   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1049   /* supperdiagonal part */
1050   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 #endif
1054 
1055 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1056 {
1057   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1058   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1059   PetscScalar       *from;
1060   const PetscScalar *x;
1061 
1062   PetscFunctionBegin;
1063   /* diagonal part */
1064   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1065   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1066   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1067   PetscCall(VecZeroEntries(a->slvec1b));
1068 
1069   /* subdiagonal part */
1070   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1071 
1072   /* copy x into the vec slvec0 */
1073   PetscCall(VecGetArray(a->slvec0, &from));
1074   PetscCall(VecGetArrayRead(xx, &x));
1075 
1076   PetscCall(PetscArraycpy(from, x, bs * mbs));
1077   PetscCall(VecRestoreArray(a->slvec0, &from));
1078   PetscCall(VecRestoreArrayRead(xx, &x));
1079 
1080   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1081   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1082   /* supperdiagonal part */
1083   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1084   PetscFunctionReturn(PETSC_SUCCESS);
1085 }
1086 
1087 #if PetscDefined(USE_COMPLEX)
1088 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1089 {
1090   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1091   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1092   PetscScalar       *from;
1093   const PetscScalar *x;
1094 
1095   PetscFunctionBegin;
1096   /* diagonal part */
1097   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1098   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1099   PetscCall(VecZeroEntries(a->slvec1b));
1100 
1101   /* subdiagonal part */
1102   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1103   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1104 
1105   /* copy x into the vec slvec0 */
1106   PetscCall(VecGetArray(a->slvec0, &from));
1107   PetscCall(VecGetArrayRead(xx, &x));
1108   PetscCall(PetscArraycpy(from, x, bs * mbs));
1109   PetscCall(VecRestoreArray(a->slvec0, &from));
1110 
1111   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1112   PetscCall(VecRestoreArrayRead(xx, &x));
1113   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1114 
1115   /* supperdiagonal part */
1116   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1117   PetscFunctionReturn(PETSC_SUCCESS);
1118 }
1119 #endif
1120 
1121 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1122 {
1123   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1124   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1125   PetscScalar       *from;
1126   const PetscScalar *x;
1127 
1128   PetscFunctionBegin;
1129   /* diagonal part */
1130   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1131   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1132   PetscCall(VecZeroEntries(a->slvec1b));
1133 
1134   /* subdiagonal part */
1135   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1136 
1137   /* copy x into the vec slvec0 */
1138   PetscCall(VecGetArray(a->slvec0, &from));
1139   PetscCall(VecGetArrayRead(xx, &x));
1140   PetscCall(PetscArraycpy(from, x, bs * mbs));
1141   PetscCall(VecRestoreArray(a->slvec0, &from));
1142 
1143   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1144   PetscCall(VecRestoreArrayRead(xx, &x));
1145   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1146 
1147   /* supperdiagonal part */
1148   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 /*
1153   This only works correctly for square matrices where the subblock A->A is the
1154    diagonal block
1155 */
1156 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1157 {
1158   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1159 
1160   PetscFunctionBegin;
1161   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1162   PetscCall(MatGetDiagonal(a->A, v));
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
1166 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1167 {
1168   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCall(MatScale(a->A, aa));
1172   PetscCall(MatScale(a->B, aa));
1173   PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175 
1176 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1177 {
1178   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1179   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1180   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1181   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1182   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1183 
1184   PetscFunctionBegin;
1185   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1186   mat->getrowactive = PETSC_TRUE;
1187 
1188   if (!mat->rowvalues && (idx || v)) {
1189     /*
1190         allocate enough space to hold information from the longest row.
1191     */
1192     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1193     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1194     PetscInt      max = 1, mbs = mat->mbs, tmp;
1195     for (i = 0; i < mbs; i++) {
1196       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1197       if (max < tmp) max = tmp;
1198     }
1199     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1200   }
1201 
1202   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1203   lrow = row - brstart; /* local row index */
1204 
1205   pvA = &vworkA;
1206   pcA = &cworkA;
1207   pvB = &vworkB;
1208   pcB = &cworkB;
1209   if (!v) {
1210     pvA = NULL;
1211     pvB = NULL;
1212   }
1213   if (!idx) {
1214     pcA = NULL;
1215     if (!v) pcB = NULL;
1216   }
1217   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1218   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1219   nztot = nzA + nzB;
1220 
1221   cmap = mat->garray;
1222   if (v || idx) {
1223     if (nztot) {
1224       /* Sort by increasing column numbers, assuming A and B already sorted */
1225       PetscInt imark = -1;
1226       if (v) {
1227         *v = v_p = mat->rowvalues;
1228         for (i = 0; i < nzB; i++) {
1229           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1230           else break;
1231         }
1232         imark = i;
1233         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1234         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1235       }
1236       if (idx) {
1237         *idx = idx_p = mat->rowindices;
1238         if (imark > -1) {
1239           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1240         } else {
1241           for (i = 0; i < nzB; i++) {
1242             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1243             else break;
1244           }
1245           imark = i;
1246         }
1247         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1248         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1249       }
1250     } else {
1251       if (idx) *idx = NULL;
1252       if (v) *v = NULL;
1253     }
1254   }
1255   *nz = nztot;
1256   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1257   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1262 {
1263   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1264 
1265   PetscFunctionBegin;
1266   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1267   baij->getrowactive = PETSC_FALSE;
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1272 {
1273   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1274   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1275 
1276   PetscFunctionBegin;
1277   aA->getrow_utriangular = PETSC_TRUE;
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1281 {
1282   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1283   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1284 
1285   PetscFunctionBegin;
1286   aA->getrow_utriangular = PETSC_FALSE;
1287   PetscFunctionReturn(PETSC_SUCCESS);
1288 }
1289 
1290 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1291 {
1292   PetscFunctionBegin;
1293   if (PetscDefined(USE_COMPLEX)) {
1294     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1295 
1296     PetscCall(MatConjugate(a->A));
1297     PetscCall(MatConjugate(a->B));
1298   }
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1303 {
1304   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1305 
1306   PetscFunctionBegin;
1307   PetscCall(MatRealPart(a->A));
1308   PetscCall(MatRealPart(a->B));
1309   PetscFunctionReturn(PETSC_SUCCESS);
1310 }
1311 
1312 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1315 
1316   PetscFunctionBegin;
1317   PetscCall(MatImaginaryPart(a->A));
1318   PetscCall(MatImaginaryPart(a->B));
1319   PetscFunctionReturn(PETSC_SUCCESS);
1320 }
1321 
1322 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1323    Input: isrow       - distributed(parallel),
1324           iscol_local - locally owned (seq)
1325 */
1326 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1327 {
1328   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1329   const PetscInt *ptr1, *ptr2;
1330 
1331   PetscFunctionBegin;
1332   *flg = PETSC_FALSE;
1333   PetscCall(ISGetLocalSize(isrow, &sz1));
1334   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1335   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1336 
1337   PetscCall(ISGetIndices(isrow, &ptr1));
1338   PetscCall(ISGetIndices(iscol_local, &ptr2));
1339 
1340   PetscCall(PetscMalloc1(sz1, &a1));
1341   PetscCall(PetscMalloc1(sz2, &a2));
1342   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1343   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1344   PetscCall(PetscSortInt(sz1, a1));
1345   PetscCall(PetscSortInt(sz2, a2));
1346 
1347   nmatch = 0;
1348   k      = 0;
1349   for (i = 0; i < sz1; i++) {
1350     for (j = k; j < sz2; j++) {
1351       if (a1[i] == a2[j]) {
1352         k = j;
1353         nmatch++;
1354         break;
1355       }
1356     }
1357   }
1358   PetscCall(ISRestoreIndices(isrow, &ptr1));
1359   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1360   PetscCall(PetscFree(a1));
1361   PetscCall(PetscFree(a2));
1362   if (nmatch < sz1) {
1363     *flg = PETSC_FALSE;
1364   } else {
1365     *flg = PETSC_TRUE;
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1371 {
1372   Mat       C[2];
1373   IS        iscol_local, isrow_local;
1374   PetscInt  csize, csize_local, rsize;
1375   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1376 
1377   PetscFunctionBegin;
1378   PetscCall(ISGetLocalSize(iscol, &csize));
1379   PetscCall(ISGetLocalSize(isrow, &rsize));
1380   if (call == MAT_REUSE_MATRIX) {
1381     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1382     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1383   } else {
1384     PetscCall(ISAllGather(iscol, &iscol_local));
1385     PetscCall(ISSorted(iscol_local, &issorted));
1386     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1387   }
1388   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1389   if (!isequal) {
1390     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1391     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1392     if (!isidentity) {
1393       if (call == MAT_REUSE_MATRIX) {
1394         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1395         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1396       } else {
1397         PetscCall(ISAllGather(isrow, &isrow_local));
1398         PetscCall(ISSorted(isrow_local, &issorted));
1399         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1400       }
1401     }
1402   }
1403   /* now call MatCreateSubMatrix_MPIBAIJ() */
1404   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1405   if (!isequal && !isidentity) {
1406     if (call == MAT_INITIAL_MATRIX) {
1407       IS       intersect;
1408       PetscInt ni;
1409 
1410       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1411       PetscCall(ISGetLocalSize(intersect, &ni));
1412       PetscCall(ISDestroy(&intersect));
1413       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1414     }
1415     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1416     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1417     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1418     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1419     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1420     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1421     PetscCall(MatDestroy(C));
1422     PetscCall(MatDestroy(C + 1));
1423   }
1424   if (call == MAT_INITIAL_MATRIX) {
1425     if (!isequal && !isidentity) {
1426       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1427       PetscCall(ISDestroy(&isrow_local));
1428     }
1429     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1430     PetscCall(ISDestroy(&iscol_local));
1431   }
1432   PetscFunctionReturn(PETSC_SUCCESS);
1433 }
1434 
1435 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1436 {
1437   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1438 
1439   PetscFunctionBegin;
1440   PetscCall(MatZeroEntries(l->A));
1441   PetscCall(MatZeroEntries(l->B));
1442   PetscFunctionReturn(PETSC_SUCCESS);
1443 }
1444 
1445 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1446 {
1447   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1448   Mat            A = a->A, B = a->B;
1449   PetscLogDouble isend[5], irecv[5];
1450 
1451   PetscFunctionBegin;
1452   info->block_size = (PetscReal)matin->rmap->bs;
1453 
1454   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1455 
1456   isend[0] = info->nz_used;
1457   isend[1] = info->nz_allocated;
1458   isend[2] = info->nz_unneeded;
1459   isend[3] = info->memory;
1460   isend[4] = info->mallocs;
1461 
1462   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1463 
1464   isend[0] += info->nz_used;
1465   isend[1] += info->nz_allocated;
1466   isend[2] += info->nz_unneeded;
1467   isend[3] += info->memory;
1468   isend[4] += info->mallocs;
1469   if (flag == MAT_LOCAL) {
1470     info->nz_used      = isend[0];
1471     info->nz_allocated = isend[1];
1472     info->nz_unneeded  = isend[2];
1473     info->memory       = isend[3];
1474     info->mallocs      = isend[4];
1475   } else if (flag == MAT_GLOBAL_MAX) {
1476     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1477 
1478     info->nz_used      = irecv[0];
1479     info->nz_allocated = irecv[1];
1480     info->nz_unneeded  = irecv[2];
1481     info->memory       = irecv[3];
1482     info->mallocs      = irecv[4];
1483   } else if (flag == MAT_GLOBAL_SUM) {
1484     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1485 
1486     info->nz_used      = irecv[0];
1487     info->nz_allocated = irecv[1];
1488     info->nz_unneeded  = irecv[2];
1489     info->memory       = irecv[3];
1490     info->mallocs      = irecv[4];
1491   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1492   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1493   info->fill_ratio_needed = 0;
1494   info->factor_mallocs    = 0;
1495   PetscFunctionReturn(PETSC_SUCCESS);
1496 }
1497 
1498 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1499 {
1500   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1501   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1502 
1503   PetscFunctionBegin;
1504   switch (op) {
1505   case MAT_NEW_NONZERO_LOCATIONS:
1506   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1507   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1508   case MAT_KEEP_NONZERO_PATTERN:
1509   case MAT_NEW_NONZERO_LOCATION_ERR:
1510     MatCheckPreallocated(A, 1);
1511     PetscCall(MatSetOption(a->A, op, flg));
1512     PetscCall(MatSetOption(a->B, op, flg));
1513     break;
1514   case MAT_ROW_ORIENTED:
1515     MatCheckPreallocated(A, 1);
1516     a->roworiented = flg;
1517 
1518     PetscCall(MatSetOption(a->A, op, flg));
1519     PetscCall(MatSetOption(a->B, op, flg));
1520     break;
1521   case MAT_IGNORE_OFF_PROC_ENTRIES:
1522     a->donotstash = flg;
1523     break;
1524   case MAT_USE_HASH_TABLE:
1525     a->ht_flag = flg;
1526     break;
1527   case MAT_HERMITIAN:
1528     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1529 #if defined(PETSC_USE_COMPLEX)
1530     if (flg) { /* need different mat-vec ops */
1531       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1532       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1533       A->ops->multtranspose    = NULL;
1534       A->ops->multtransposeadd = NULL;
1535       A->symmetric             = PETSC_BOOL3_FALSE;
1536     }
1537 #endif
1538     break;
1539   case MAT_SPD:
1540   case MAT_SYMMETRIC:
1541     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1542 #if defined(PETSC_USE_COMPLEX)
1543     if (flg) { /* restore to use default mat-vec ops */
1544       A->ops->mult             = MatMult_MPISBAIJ;
1545       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1546       A->ops->multtranspose    = MatMult_MPISBAIJ;
1547       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1548     }
1549 #endif
1550     break;
1551   case MAT_STRUCTURALLY_SYMMETRIC:
1552     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1553     break;
1554   case MAT_IGNORE_LOWER_TRIANGULAR:
1555   case MAT_ERROR_LOWER_TRIANGULAR:
1556     aA->ignore_ltriangular = flg;
1557     break;
1558   case MAT_GETROW_UPPERTRIANGULAR:
1559     aA->getrow_utriangular = flg;
1560     break;
1561   default:
1562     break;
1563   }
1564   PetscFunctionReturn(PETSC_SUCCESS);
1565 }
1566 
1567 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1568 {
1569   PetscFunctionBegin;
1570   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1571   if (reuse == MAT_INITIAL_MATRIX) {
1572     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1573   } else if (reuse == MAT_REUSE_MATRIX) {
1574     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1575   }
1576   PetscFunctionReturn(PETSC_SUCCESS);
1577 }
1578 
1579 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1580 {
1581   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1582   Mat           a = baij->A, b = baij->B;
1583   PetscInt      nv, m, n;
1584   PetscBool     flg;
1585 
1586   PetscFunctionBegin;
1587   if (ll != rr) {
1588     PetscCall(VecEqual(ll, rr, &flg));
1589     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1590   }
1591   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1592 
1593   PetscCall(MatGetLocalSize(mat, &m, &n));
1594   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1595 
1596   PetscCall(VecGetLocalSize(rr, &nv));
1597   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1598 
1599   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1600 
1601   /* left diagonalscale the off-diagonal part */
1602   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1603 
1604   /* scale the diagonal part */
1605   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1606 
1607   /* right diagonalscale the off-diagonal part */
1608   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1609   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1614 {
1615   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1616 
1617   PetscFunctionBegin;
1618   PetscCall(MatSetUnfactored(a->A));
1619   PetscFunctionReturn(PETSC_SUCCESS);
1620 }
1621 
1622 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1623 
1624 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1625 {
1626   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1627   Mat           a, b, c, d;
1628   PetscBool     flg;
1629 
1630   PetscFunctionBegin;
1631   a = matA->A;
1632   b = matA->B;
1633   c = matB->A;
1634   d = matB->B;
1635 
1636   PetscCall(MatEqual(a, c, &flg));
1637   if (flg) PetscCall(MatEqual(b, d, &flg));
1638   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1639   PetscFunctionReturn(PETSC_SUCCESS);
1640 }
1641 
1642 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1643 {
1644   PetscBool isbaij;
1645 
1646   PetscFunctionBegin;
1647   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1648   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1649   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1650   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1651     PetscCall(MatGetRowUpperTriangular(A));
1652     PetscCall(MatCopy_Basic(A, B, str));
1653     PetscCall(MatRestoreRowUpperTriangular(A));
1654   } else {
1655     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1656     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1657 
1658     PetscCall(MatCopy(a->A, b->A, str));
1659     PetscCall(MatCopy(a->B, b->B, str));
1660   }
1661   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1666 {
1667   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1668   PetscBLASInt  bnz, one                          = 1;
1669   Mat_SeqSBAIJ *xa, *ya;
1670   Mat_SeqBAIJ  *xb, *yb;
1671 
1672   PetscFunctionBegin;
1673   if (str == SAME_NONZERO_PATTERN) {
1674     PetscScalar alpha = a;
1675     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1676     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1677     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1678     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1679     xb = (Mat_SeqBAIJ *)xx->B->data;
1680     yb = (Mat_SeqBAIJ *)yy->B->data;
1681     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1682     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1683     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1684   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1685     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1686     PetscCall(MatAXPY_Basic(Y, a, X, str));
1687     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1688   } else {
1689     Mat       B;
1690     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1691     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1692     PetscCall(MatGetRowUpperTriangular(X));
1693     PetscCall(MatGetRowUpperTriangular(Y));
1694     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1695     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1696     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1697     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1698     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1699     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1700     PetscCall(MatSetType(B, MATMPISBAIJ));
1701     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1702     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1703     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1704     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1705     PetscCall(MatHeaderMerge(Y, &B));
1706     PetscCall(PetscFree(nnz_d));
1707     PetscCall(PetscFree(nnz_o));
1708     PetscCall(MatRestoreRowUpperTriangular(X));
1709     PetscCall(MatRestoreRowUpperTriangular(Y));
1710   }
1711   PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713 
1714 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1715 {
1716   PetscInt  i;
1717   PetscBool flg;
1718 
1719   PetscFunctionBegin;
1720   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1721   for (i = 0; i < n; i++) {
1722     PetscCall(ISEqual(irow[i], icol[i], &flg));
1723     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1724   }
1725   PetscFunctionReturn(PETSC_SUCCESS);
1726 }
1727 
1728 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1729 {
1730   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1731   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1732 
1733   PetscFunctionBegin;
1734   if (!Y->preallocated) {
1735     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1736   } else if (!aij->nz) {
1737     PetscInt nonew = aij->nonew;
1738     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1739     aij->nonew = nonew;
1740   }
1741   PetscCall(MatShift_Basic(Y, a));
1742   PetscFunctionReturn(PETSC_SUCCESS);
1743 }
1744 
1745 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1746 {
1747   PetscFunctionBegin;
1748   *a = ((Mat_MPISBAIJ *)A->data)->A;
1749   PetscFunctionReturn(PETSC_SUCCESS);
1750 }
1751 
1752 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1753 {
1754   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1755 
1756   PetscFunctionBegin;
1757   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1758   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1759   PetscFunctionReturn(PETSC_SUCCESS);
1760 }
1761 
1762 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1763 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1764 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1765 
1766 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1767                                        MatGetRow_MPISBAIJ,
1768                                        MatRestoreRow_MPISBAIJ,
1769                                        MatMult_MPISBAIJ,
1770                                        /*  4*/ MatMultAdd_MPISBAIJ,
1771                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1772                                        MatMultAdd_MPISBAIJ,
1773                                        NULL,
1774                                        NULL,
1775                                        NULL,
1776                                        /* 10*/ NULL,
1777                                        NULL,
1778                                        NULL,
1779                                        MatSOR_MPISBAIJ,
1780                                        MatTranspose_MPISBAIJ,
1781                                        /* 15*/ MatGetInfo_MPISBAIJ,
1782                                        MatEqual_MPISBAIJ,
1783                                        MatGetDiagonal_MPISBAIJ,
1784                                        MatDiagonalScale_MPISBAIJ,
1785                                        MatNorm_MPISBAIJ,
1786                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1787                                        MatAssemblyEnd_MPISBAIJ,
1788                                        MatSetOption_MPISBAIJ,
1789                                        MatZeroEntries_MPISBAIJ,
1790                                        /* 24*/ NULL,
1791                                        NULL,
1792                                        NULL,
1793                                        NULL,
1794                                        NULL,
1795                                        /* 29*/ MatSetUp_MPI_Hash,
1796                                        NULL,
1797                                        NULL,
1798                                        MatGetDiagonalBlock_MPISBAIJ,
1799                                        NULL,
1800                                        /* 34*/ MatDuplicate_MPISBAIJ,
1801                                        NULL,
1802                                        NULL,
1803                                        NULL,
1804                                        NULL,
1805                                        /* 39*/ MatAXPY_MPISBAIJ,
1806                                        MatCreateSubMatrices_MPISBAIJ,
1807                                        MatIncreaseOverlap_MPISBAIJ,
1808                                        MatGetValues_MPISBAIJ,
1809                                        MatCopy_MPISBAIJ,
1810                                        /* 44*/ NULL,
1811                                        MatScale_MPISBAIJ,
1812                                        MatShift_MPISBAIJ,
1813                                        NULL,
1814                                        NULL,
1815                                        /* 49*/ NULL,
1816                                        NULL,
1817                                        NULL,
1818                                        NULL,
1819                                        NULL,
1820                                        /* 54*/ NULL,
1821                                        NULL,
1822                                        MatSetUnfactored_MPISBAIJ,
1823                                        NULL,
1824                                        MatSetValuesBlocked_MPISBAIJ,
1825                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        /* 64*/ NULL,
1831                                        NULL,
1832                                        NULL,
1833                                        NULL,
1834                                        MatGetRowMaxAbs_MPISBAIJ,
1835                                        /* 69*/ NULL,
1836                                        MatConvert_MPISBAIJ_Basic,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        MatLoad_MPISBAIJ,
1845                                        /* 79*/ NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        /* 84*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        /* 89*/ NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        MatConjugate_MPISBAIJ,
1860                                        /* 94*/ NULL,
1861                                        NULL,
1862                                        MatRealPart_MPISBAIJ,
1863                                        MatImaginaryPart_MPISBAIJ,
1864                                        MatGetRowUpperTriangular_MPISBAIJ,
1865                                        /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ,
1866                                        NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        NULL,
1870                                        /*104*/ NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        /*109*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        /*114*/ NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        /*119*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        /*124*/ NULL,
1891                                        NULL,
1892                                        MatSetBlockSizes_Default,
1893                                        NULL,
1894                                        NULL,
1895                                        /*129*/ NULL,
1896                                        MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        /*134*/ NULL,
1901                                        NULL,
1902                                        MatEliminateZeros_MPISBAIJ,
1903                                        NULL,
1904                                        NULL,
1905                                        /*139*/ NULL,
1906                                        NULL,
1907                                        MatCopyHashToXAIJ_MPI_Hash,
1908                                        NULL,
1909                                        NULL};
1910 
1911 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1912 {
1913   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1914   PetscInt      i, mbs, Mbs;
1915   PetscMPIInt   size;
1916 
1917   PetscFunctionBegin;
1918   if (B->hash_active) {
1919     B->ops[0]      = b->cops;
1920     B->hash_active = PETSC_FALSE;
1921   }
1922   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1923   PetscCall(MatSetBlockSize(B, bs));
1924   PetscCall(PetscLayoutSetUp(B->rmap));
1925   PetscCall(PetscLayoutSetUp(B->cmap));
1926   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1927   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1928   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1929 
1930   mbs = B->rmap->n / bs;
1931   Mbs = B->rmap->N / bs;
1932   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1933 
1934   B->rmap->bs = bs;
1935   b->bs2      = bs * bs;
1936   b->mbs      = mbs;
1937   b->Mbs      = Mbs;
1938   b->nbs      = B->cmap->n / bs;
1939   b->Nbs      = B->cmap->N / bs;
1940 
1941   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1942   b->rstartbs = B->rmap->rstart / bs;
1943   b->rendbs   = B->rmap->rend / bs;
1944 
1945   b->cstartbs = B->cmap->rstart / bs;
1946   b->cendbs   = B->cmap->rend / bs;
1947 
1948 #if defined(PETSC_USE_CTABLE)
1949   PetscCall(PetscHMapIDestroy(&b->colmap));
1950 #else
1951   PetscCall(PetscFree(b->colmap));
1952 #endif
1953   PetscCall(PetscFree(b->garray));
1954   PetscCall(VecDestroy(&b->lvec));
1955   PetscCall(VecScatterDestroy(&b->Mvctx));
1956   PetscCall(VecDestroy(&b->slvec0));
1957   PetscCall(VecDestroy(&b->slvec0b));
1958   PetscCall(VecDestroy(&b->slvec1));
1959   PetscCall(VecDestroy(&b->slvec1a));
1960   PetscCall(VecDestroy(&b->slvec1b));
1961   PetscCall(VecScatterDestroy(&b->sMvctx));
1962 
1963   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1964 
1965   MatSeqXAIJGetOptions_Private(b->B);
1966   PetscCall(MatDestroy(&b->B));
1967   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1968   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1969   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1970   MatSeqXAIJRestoreOptions_Private(b->B);
1971 
1972   MatSeqXAIJGetOptions_Private(b->A);
1973   PetscCall(MatDestroy(&b->A));
1974   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1975   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1976   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1977   MatSeqXAIJRestoreOptions_Private(b->A);
1978 
1979   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1980   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1981 
1982   B->preallocated  = PETSC_TRUE;
1983   B->was_assembled = PETSC_FALSE;
1984   B->assembled     = PETSC_FALSE;
1985   PetscFunctionReturn(PETSC_SUCCESS);
1986 }
1987 
1988 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1989 {
1990   PetscInt        m, rstart, cend;
1991   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1992   const PetscInt *JJ          = NULL;
1993   PetscScalar    *values      = NULL;
1994   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1995   PetscBool       nooffprocentries;
1996 
1997   PetscFunctionBegin;
1998   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1999   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2000   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2001   PetscCall(PetscLayoutSetUp(B->rmap));
2002   PetscCall(PetscLayoutSetUp(B->cmap));
2003   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2004   m      = B->rmap->n / bs;
2005   rstart = B->rmap->rstart / bs;
2006   cend   = B->cmap->rend / bs;
2007 
2008   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2009   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2010   for (i = 0; i < m; i++) {
2011     nz = ii[i + 1] - ii[i];
2012     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2013     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2014     JJ = jj + ii[i];
2015     bd = 0;
2016     for (j = 0; j < nz; j++) {
2017       if (*JJ >= i + rstart) break;
2018       JJ++;
2019       bd++;
2020     }
2021     d = 0;
2022     for (; j < nz; j++) {
2023       if (*JJ++ >= cend) break;
2024       d++;
2025     }
2026     d_nnz[i] = d;
2027     o_nnz[i] = nz - d - bd;
2028     nz       = nz - bd;
2029     nz_max   = PetscMax(nz_max, nz);
2030   }
2031   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2032   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2033   PetscCall(PetscFree2(d_nnz, o_nnz));
2034 
2035   values = (PetscScalar *)V;
2036   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2037   for (i = 0; i < m; i++) {
2038     PetscInt        row   = i + rstart;
2039     PetscInt        ncols = ii[i + 1] - ii[i];
2040     const PetscInt *icols = jj + ii[i];
2041     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2042       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2043       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2044     } else { /* block ordering does not match so we can only insert one block at a time. */
2045       PetscInt j;
2046       for (j = 0; j < ncols; j++) {
2047         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2048         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2049       }
2050     }
2051   }
2052 
2053   if (!V) PetscCall(PetscFree(values));
2054   nooffprocentries    = B->nooffprocentries;
2055   B->nooffprocentries = PETSC_TRUE;
2056   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2057   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2058   B->nooffprocentries = nooffprocentries;
2059 
2060   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2061   PetscFunctionReturn(PETSC_SUCCESS);
2062 }
2063 
2064 /*MC
2065    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2066    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2067    the matrix is stored.
2068 
2069    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2070    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2071 
2072    Options Database Key:
2073 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2074 
2075    Level: beginner
2076 
2077    Note:
2078      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2079      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2080 
2081 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2082 M*/
2083 
2084 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2085 {
2086   Mat_MPISBAIJ *b;
2087   PetscBool     flg = PETSC_FALSE;
2088 
2089   PetscFunctionBegin;
2090   PetscCall(PetscNew(&b));
2091   B->data   = (void *)b;
2092   B->ops[0] = MatOps_Values;
2093 
2094   B->ops->destroy = MatDestroy_MPISBAIJ;
2095   B->ops->view    = MatView_MPISBAIJ;
2096   B->assembled    = PETSC_FALSE;
2097   B->insertmode   = NOT_SET_VALUES;
2098 
2099   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2100   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2101 
2102   /* build local table of row and column ownerships */
2103   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2104 
2105   /* build cache for off array entries formed */
2106   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2107 
2108   b->donotstash  = PETSC_FALSE;
2109   b->colmap      = NULL;
2110   b->garray      = NULL;
2111   b->roworiented = PETSC_TRUE;
2112 
2113   /* stuff used in block assembly */
2114   b->barray = NULL;
2115 
2116   /* stuff used for matrix vector multiply */
2117   b->lvec    = NULL;
2118   b->Mvctx   = NULL;
2119   b->slvec0  = NULL;
2120   b->slvec0b = NULL;
2121   b->slvec1  = NULL;
2122   b->slvec1a = NULL;
2123   b->slvec1b = NULL;
2124   b->sMvctx  = NULL;
2125 
2126   /* stuff for MatGetRow() */
2127   b->rowindices   = NULL;
2128   b->rowvalues    = NULL;
2129   b->getrowactive = PETSC_FALSE;
2130 
2131   /* hash table stuff */
2132   b->ht           = NULL;
2133   b->hd           = NULL;
2134   b->ht_size      = 0;
2135   b->ht_flag      = PETSC_FALSE;
2136   b->ht_fact      = 0;
2137   b->ht_total_ct  = 0;
2138   b->ht_insert_ct = 0;
2139 
2140   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2141   b->ijonly = PETSC_FALSE;
2142 
2143   b->in_loc = NULL;
2144   b->v_loc  = NULL;
2145   b->n_loc  = 0;
2146 
2147   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2149   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2150   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2151 #if defined(PETSC_HAVE_ELEMENTAL)
2152   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2153 #endif
2154 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
2155   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2156 #endif
2157   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2158   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2159 
2160   B->symmetric                   = PETSC_BOOL3_TRUE;
2161   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2162   B->symmetry_eternal            = PETSC_TRUE;
2163   B->structural_symmetry_eternal = PETSC_TRUE;
2164 #if defined(PETSC_USE_COMPLEX)
2165   B->hermitian = PETSC_BOOL3_FALSE;
2166 #else
2167   B->hermitian = PETSC_BOOL3_TRUE;
2168 #endif
2169 
2170   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2171   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2172   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2173   if (flg) {
2174     PetscReal fact = 1.39;
2175     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2176     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2177     if (fact <= 1.0) fact = 1.39;
2178     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2179     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2180   }
2181   PetscOptionsEnd();
2182   PetscFunctionReturn(PETSC_SUCCESS);
2183 }
2184 
2185 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2186 /*MC
2187    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2188 
2189    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2190    and `MATMPISBAIJ` otherwise.
2191 
2192    Options Database Key:
2193 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2194 
2195   Level: beginner
2196 
2197 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2198 M*/
2199 
2200 /*@
2201   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2202   the user should preallocate the matrix storage by setting the parameters
2203   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2204   performance can be increased by more than a factor of 50.
2205 
2206   Collective
2207 
2208   Input Parameters:
2209 + B     - the matrix
2210 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2211           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2212 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2213           submatrix  (same for all local rows)
2214 . d_nnz - array containing the number of block nonzeros in the various block rows
2215           in the upper triangular and diagonal part of the in diagonal portion of the local
2216           (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2217           for the diagonal entry and set a value even if it is zero.
2218 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2219           submatrix (same for all local rows).
2220 - o_nnz - array containing the number of nonzeros in the various block rows of the
2221           off-diagonal portion of the local submatrix that is right of the diagonal
2222           (possibly different for each block row) or `NULL`.
2223 
2224   Options Database Keys:
2225 + -mat_no_unroll  - uses code that does not unroll the loops in the
2226                     block calculations (much slower)
2227 - -mat_block_size - size of the blocks to use
2228 
2229   Level: intermediate
2230 
2231   Notes:
2232 
2233   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2234   than it must be used on all processors that share the object for that argument.
2235 
2236   If the *_nnz parameter is given then the *_nz parameter is ignored
2237 
2238   Storage Information:
2239   For a square global matrix we define each processor's diagonal portion
2240   to be its local rows and the corresponding columns (a square submatrix);
2241   each processor's off-diagonal portion encompasses the remainder of the
2242   local matrix (a rectangular submatrix).
2243 
2244   The user can specify preallocated storage for the diagonal part of
2245   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2246   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2247   memory allocation.  Likewise, specify preallocated storage for the
2248   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2249 
2250   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2251   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2252   You can also run with the option `-info` and look for messages with the string
2253   malloc in them to see if additional memory allocation was needed.
2254 
2255   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2256   the figure below we depict these three local rows and all columns (0-11).
2257 
2258 .vb
2259            0 1 2 3 4 5 6 7 8 9 10 11
2260           --------------------------
2261    row 3  |. . . d d d o o o o  o  o
2262    row 4  |. . . d d d o o o o  o  o
2263    row 5  |. . . d d d o o o o  o  o
2264           --------------------------
2265 .ve
2266 
2267   Thus, any entries in the d locations are stored in the d (diagonal)
2268   submatrix, and any entries in the o locations are stored in the
2269   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2270   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2271 
2272   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2273   plus the diagonal part of the d matrix,
2274   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2275 
2276   In general, for PDE problems in which most nonzeros are near the diagonal,
2277   one expects `d_nz` >> `o_nz`.
2278 
2279 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2280 @*/
2281 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2282 {
2283   PetscFunctionBegin;
2284   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2285   PetscValidType(B, 1);
2286   PetscValidLogicalCollectiveInt(B, bs, 2);
2287   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2288   PetscFunctionReturn(PETSC_SUCCESS);
2289 }
2290 
2291 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2292 /*@
2293   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2294   (block compressed row).  For good matrix assembly performance
2295   the user should preallocate the matrix storage by setting the parameters
2296   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2297 
2298   Collective
2299 
2300   Input Parameters:
2301 + comm  - MPI communicator
2302 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2303           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2304 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2305           This value should be the same as the local size used in creating the
2306           y vector for the matrix-vector product y = Ax.
2307 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2308           This value should be the same as the local size used in creating the
2309           x vector for the matrix-vector product y = Ax.
2310 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2311 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2312 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2313           submatrix (same for all local rows)
2314 . d_nnz - array containing the number of block nonzeros in the various block rows
2315           in the upper triangular portion of the in diagonal portion of the local
2316           (possibly different for each block block row) or `NULL`.
2317           If you plan to factor the matrix you must leave room for the diagonal entry and
2318           set its value even if it is zero.
2319 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2320           submatrix (same for all local rows).
2321 - o_nnz - array containing the number of nonzeros in the various block rows of the
2322           off-diagonal portion of the local submatrix (possibly different for
2323           each block row) or `NULL`.
2324 
2325   Output Parameter:
2326 . A - the matrix
2327 
2328   Options Database Keys:
2329 + -mat_no_unroll  - uses code that does not unroll the loops in the
2330                     block calculations (much slower)
2331 . -mat_block_size - size of the blocks to use
2332 - -mat_mpi        - use the parallel matrix data structures even on one processor
2333                     (defaults to using SeqBAIJ format on one processor)
2334 
2335   Level: intermediate
2336 
2337   Notes:
2338   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2339   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2340   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2341 
2342   The number of rows and columns must be divisible by blocksize.
2343   This matrix type does not support complex Hermitian operation.
2344 
2345   The user MUST specify either the local or global matrix dimensions
2346   (possibly both).
2347 
2348   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2349   than it must be used on all processors that share the object for that argument.
2350 
2351   If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2352   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2353 
2354   If the *_nnz parameter is given then the *_nz parameter is ignored
2355 
2356   Storage Information:
2357   For a square global matrix we define each processor's diagonal portion
2358   to be its local rows and the corresponding columns (a square submatrix);
2359   each processor's off-diagonal portion encompasses the remainder of the
2360   local matrix (a rectangular submatrix).
2361 
2362   The user can specify preallocated storage for the diagonal part of
2363   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2364   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2365   memory allocation. Likewise, specify preallocated storage for the
2366   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2367 
2368   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2369   the figure below we depict these three local rows and all columns (0-11).
2370 
2371 .vb
2372            0 1 2 3 4 5 6 7 8 9 10 11
2373           --------------------------
2374    row 3  |. . . d d d o o o o  o  o
2375    row 4  |. . . d d d o o o o  o  o
2376    row 5  |. . . d d d o o o o  o  o
2377           --------------------------
2378 .ve
2379 
2380   Thus, any entries in the d locations are stored in the d (diagonal)
2381   submatrix, and any entries in the o locations are stored in the
2382   o (off-diagonal) submatrix. Note that the d matrix is stored in
2383   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2384 
2385   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2386   plus the diagonal part of the d matrix,
2387   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2388   In general, for PDE problems in which most nonzeros are near the diagonal,
2389   one expects `d_nz` >> `o_nz`.
2390 
2391 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2392           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2393 @*/
2394 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2395 {
2396   PetscMPIInt size;
2397 
2398   PetscFunctionBegin;
2399   PetscCall(MatCreate(comm, A));
2400   PetscCall(MatSetSizes(*A, m, n, M, N));
2401   PetscCallMPI(MPI_Comm_size(comm, &size));
2402   if (size > 1) {
2403     PetscCall(MatSetType(*A, MATMPISBAIJ));
2404     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2405   } else {
2406     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2407     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2408   }
2409   PetscFunctionReturn(PETSC_SUCCESS);
2410 }
2411 
2412 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2413 {
2414   Mat           mat;
2415   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2416   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2417   PetscScalar  *array;
2418 
2419   PetscFunctionBegin;
2420   *newmat = NULL;
2421 
2422   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2423   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2424   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2425   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2426   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2427 
2428   if (matin->hash_active) {
2429     PetscCall(MatSetUp(mat));
2430   } else {
2431     mat->factortype   = matin->factortype;
2432     mat->preallocated = PETSC_TRUE;
2433     mat->assembled    = PETSC_TRUE;
2434     mat->insertmode   = NOT_SET_VALUES;
2435 
2436     a      = (Mat_MPISBAIJ *)mat->data;
2437     a->bs2 = oldmat->bs2;
2438     a->mbs = oldmat->mbs;
2439     a->nbs = oldmat->nbs;
2440     a->Mbs = oldmat->Mbs;
2441     a->Nbs = oldmat->Nbs;
2442 
2443     a->size         = oldmat->size;
2444     a->rank         = oldmat->rank;
2445     a->donotstash   = oldmat->donotstash;
2446     a->roworiented  = oldmat->roworiented;
2447     a->rowindices   = NULL;
2448     a->rowvalues    = NULL;
2449     a->getrowactive = PETSC_FALSE;
2450     a->barray       = NULL;
2451     a->rstartbs     = oldmat->rstartbs;
2452     a->rendbs       = oldmat->rendbs;
2453     a->cstartbs     = oldmat->cstartbs;
2454     a->cendbs       = oldmat->cendbs;
2455 
2456     /* hash table stuff */
2457     a->ht           = NULL;
2458     a->hd           = NULL;
2459     a->ht_size      = 0;
2460     a->ht_flag      = oldmat->ht_flag;
2461     a->ht_fact      = oldmat->ht_fact;
2462     a->ht_total_ct  = 0;
2463     a->ht_insert_ct = 0;
2464 
2465     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2466     if (oldmat->colmap) {
2467 #if defined(PETSC_USE_CTABLE)
2468       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2469 #else
2470       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2471       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2472 #endif
2473     } else a->colmap = NULL;
2474 
2475     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2476       PetscCall(PetscMalloc1(len, &a->garray));
2477       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2478     } else a->garray = NULL;
2479 
2480     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2481     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2482     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2483 
2484     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2485     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2486 
2487     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2488     PetscCall(VecGetArray(a->slvec1, &array));
2489     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2490     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2491     PetscCall(VecRestoreArray(a->slvec1, &array));
2492     PetscCall(VecGetArray(a->slvec0, &array));
2493     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2494     PetscCall(VecRestoreArray(a->slvec0, &array));
2495 
2496     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2497     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2498     a->sMvctx = oldmat->sMvctx;
2499 
2500     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2501     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2502   }
2503   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2504   *newmat = mat;
2505   PetscFunctionReturn(PETSC_SUCCESS);
2506 }
2507 
2508 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2509 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2510 
2511 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2512 {
2513   PetscBool isbinary;
2514 
2515   PetscFunctionBegin;
2516   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2517   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2518   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2519   PetscFunctionReturn(PETSC_SUCCESS);
2520 }
2521 
2522 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2523 {
2524   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2525   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2526   PetscReal     atmp;
2527   PetscReal    *work, *svalues, *rvalues;
2528   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2529   PetscMPIInt   rank, size;
2530   PetscInt     *rowners_bs, count, source;
2531   PetscScalar  *va;
2532   MatScalar    *ba;
2533   MPI_Status    stat;
2534 
2535   PetscFunctionBegin;
2536   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2537   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2538   PetscCall(VecGetArray(v, &va));
2539 
2540   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2541   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2542 
2543   bs  = A->rmap->bs;
2544   mbs = a->mbs;
2545   Mbs = a->Mbs;
2546   ba  = b->a;
2547   bi  = b->i;
2548   bj  = b->j;
2549 
2550   /* find ownerships */
2551   rowners_bs = A->rmap->range;
2552 
2553   /* each proc creates an array to be distributed */
2554   PetscCall(PetscCalloc1(bs * Mbs, &work));
2555 
2556   /* row_max for B */
2557   if (rank != size - 1) {
2558     for (i = 0; i < mbs; i++) {
2559       ncols = bi[1] - bi[0];
2560       bi++;
2561       brow = bs * i;
2562       for (j = 0; j < ncols; j++) {
2563         bcol = bs * (*bj);
2564         for (kcol = 0; kcol < bs; kcol++) {
2565           col = bcol + kcol;           /* local col index */
2566           col += rowners_bs[rank + 1]; /* global col index */
2567           for (krow = 0; krow < bs; krow++) {
2568             atmp = PetscAbsScalar(*ba);
2569             ba++;
2570             row = brow + krow; /* local row index */
2571             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2572             if (work[col] < atmp) work[col] = atmp;
2573           }
2574         }
2575         bj++;
2576       }
2577     }
2578 
2579     /* send values to its owners */
2580     for (PetscMPIInt dest = rank + 1; dest < size; dest++) {
2581       svalues = work + rowners_bs[dest];
2582       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2583       PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2584     }
2585   }
2586 
2587   /* receive values */
2588   if (rank) {
2589     rvalues = work;
2590     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2591     for (source = 0; source < rank; source++) {
2592       PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2593       /* process values */
2594       for (i = 0; i < count; i++) {
2595         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2596       }
2597     }
2598   }
2599 
2600   PetscCall(VecRestoreArray(v, &va));
2601   PetscCall(PetscFree(work));
2602   PetscFunctionReturn(PETSC_SUCCESS);
2603 }
2604 
2605 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2606 {
2607   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2608   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2609   PetscScalar       *x, *ptr, *from;
2610   Vec                bb1;
2611   const PetscScalar *b;
2612 
2613   PetscFunctionBegin;
2614   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2615   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2616 
2617   if (flag == SOR_APPLY_UPPER) {
2618     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2619     PetscFunctionReturn(PETSC_SUCCESS);
2620   }
2621 
2622   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2623     if (flag & SOR_ZERO_INITIAL_GUESS) {
2624       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2625       its--;
2626     }
2627 
2628     PetscCall(VecDuplicate(bb, &bb1));
2629     while (its--) {
2630       /* lower triangular part: slvec0b = - B^T*xx */
2631       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2632 
2633       /* copy xx into slvec0a */
2634       PetscCall(VecGetArray(mat->slvec0, &ptr));
2635       PetscCall(VecGetArray(xx, &x));
2636       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2637       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2638 
2639       PetscCall(VecScale(mat->slvec0, -1.0));
2640 
2641       /* copy bb into slvec1a */
2642       PetscCall(VecGetArray(mat->slvec1, &ptr));
2643       PetscCall(VecGetArrayRead(bb, &b));
2644       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2645       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2646 
2647       /* set slvec1b = 0 */
2648       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2649       PetscCall(VecZeroEntries(mat->slvec1b));
2650 
2651       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2652       PetscCall(VecRestoreArray(xx, &x));
2653       PetscCall(VecRestoreArrayRead(bb, &b));
2654       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2655 
2656       /* upper triangular part: bb1 = bb1 - B*x */
2657       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2658 
2659       /* local diagonal sweep */
2660       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2661     }
2662     PetscCall(VecDestroy(&bb1));
2663   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2664     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2665   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2666     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2667   } else if (flag & SOR_EISENSTAT) {
2668     Vec                xx1;
2669     PetscBool          hasop;
2670     const PetscScalar *diag;
2671     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2672     PetscInt           i, n;
2673 
2674     if (!mat->xx1) {
2675       PetscCall(VecDuplicate(bb, &mat->xx1));
2676       PetscCall(VecDuplicate(bb, &mat->bb1));
2677     }
2678     xx1 = mat->xx1;
2679     bb1 = mat->bb1;
2680 
2681     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2682 
2683     if (!mat->diag) {
2684       /* this is wrong for same matrix with new nonzero values */
2685       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2686       PetscCall(MatGetDiagonal(matin, mat->diag));
2687     }
2688     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2689 
2690     if (hasop) {
2691       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2692       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2693     } else {
2694       /*
2695           These two lines are replaced by code that may be a bit faster for a good compiler
2696       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2697       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2698       */
2699       PetscCall(VecGetArray(mat->slvec1a, &sl));
2700       PetscCall(VecGetArrayRead(mat->diag, &diag));
2701       PetscCall(VecGetArrayRead(bb, &b));
2702       PetscCall(VecGetArray(xx, &x));
2703       PetscCall(VecGetLocalSize(xx, &n));
2704       if (omega == 1.0) {
2705         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2706         PetscCall(PetscLogFlops(2.0 * n));
2707       } else {
2708         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2709         PetscCall(PetscLogFlops(3.0 * n));
2710       }
2711       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2712       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2713       PetscCall(VecRestoreArrayRead(bb, &b));
2714       PetscCall(VecRestoreArray(xx, &x));
2715     }
2716 
2717     /* multiply off-diagonal portion of matrix */
2718     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2719     PetscCall(VecZeroEntries(mat->slvec1b));
2720     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2721     PetscCall(VecGetArray(mat->slvec0, &from));
2722     PetscCall(VecGetArray(xx, &x));
2723     PetscCall(PetscArraycpy(from, x, bs * mbs));
2724     PetscCall(VecRestoreArray(mat->slvec0, &from));
2725     PetscCall(VecRestoreArray(xx, &x));
2726     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2727     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2728     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2729 
2730     /* local sweep */
2731     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2732     PetscCall(VecAXPY(xx, 1.0, xx1));
2733   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2734   PetscFunctionReturn(PETSC_SUCCESS);
2735 }
2736 
2737 /*@
2738   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2739 
2740   Collective
2741 
2742   Input Parameters:
2743 + comm - MPI communicator
2744 . bs   - the block size, only a block size of 1 is supported
2745 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2746 . n    - This value should be the same as the local size used in creating the
2747          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2748          calculated if `N` is given) For square matrices `n` is almost always `m`.
2749 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2750 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2751 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2752 . j    - column indices
2753 - a    - matrix values
2754 
2755   Output Parameter:
2756 . mat - the matrix
2757 
2758   Level: intermediate
2759 
2760   Notes:
2761   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2762   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2763   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2764 
2765   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2766 
2767 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2768           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2769 @*/
2770 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2771 {
2772   PetscFunctionBegin;
2773   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2774   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2775   PetscCall(MatCreate(comm, mat));
2776   PetscCall(MatSetSizes(*mat, m, n, M, N));
2777   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2778   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2779   PetscFunctionReturn(PETSC_SUCCESS);
2780 }
2781 
2782 /*@
2783   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2784 
2785   Collective
2786 
2787   Input Parameters:
2788 + B  - the matrix
2789 . bs - the block size
2790 . i  - the indices into `j` for the start of each local row (indices start with zero)
2791 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2792 - v  - optional values in the matrix, pass `NULL` if not provided
2793 
2794   Level: advanced
2795 
2796   Notes:
2797   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2798   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2799   called this routine.
2800 
2801   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2802   and usually the numerical values as well
2803 
2804   Any entries passed in that are below the diagonal are ignored
2805 
2806 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2807           `MatCreateMPISBAIJWithArrays()`
2808 @*/
2809 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2810 {
2811   PetscFunctionBegin;
2812   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2813   PetscFunctionReturn(PETSC_SUCCESS);
2814 }
2815 
2816 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2817 {
2818   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2819   PetscInt    *indx;
2820   PetscScalar *values;
2821 
2822   PetscFunctionBegin;
2823   PetscCall(MatGetSize(inmat, &m, &N));
2824   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2825     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2826     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2827     PetscInt     *bindx, rmax = a->rmax, j;
2828     PetscMPIInt   rank, size;
2829 
2830     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2831     mbs = m / bs;
2832     Nbs = N / cbs;
2833     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2834     nbs = n / cbs;
2835 
2836     PetscCall(PetscMalloc1(rmax, &bindx));
2837     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2838 
2839     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2840     PetscCallMPI(MPI_Comm_rank(comm, &size));
2841     if (rank == size - 1) {
2842       /* Check sum(nbs) = Nbs */
2843       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2844     }
2845 
2846     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2847     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2848     for (i = 0; i < mbs; i++) {
2849       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2850       nnz = nnz / bs;
2851       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2852       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2853       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2854     }
2855     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2856     PetscCall(PetscFree(bindx));
2857 
2858     PetscCall(MatCreate(comm, outmat));
2859     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2860     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2861     PetscCall(MatSetType(*outmat, MATSBAIJ));
2862     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2863     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2864     MatPreallocateEnd(dnz, onz);
2865   }
2866 
2867   /* numeric phase */
2868   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2869   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2870 
2871   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2872   for (i = 0; i < m; i++) {
2873     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2874     Ii = i + rstart;
2875     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2876     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2877   }
2878   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2879   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2880   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2881   PetscFunctionReturn(PETSC_SUCCESS);
2882 }
2883