xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 6cff0a6b28b63cecf061139fc1b2425d23f67b96)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237     A->nonzerostate++; \
238   a_noinsert:; \
239     ailen[brow] = nrow; \
240   } while (0)
241 
242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
243   do { \
244     brow = row / bs; \
245     rp   = bj + bi[brow]; \
246     ap   = ba + bs2 * bi[brow]; \
247     rmax = bimax[brow]; \
248     nrow = bilen[brow]; \
249     bcol = col / bs; \
250     ridx = row % bs; \
251     cidx = col % bs; \
252     low  = 0; \
253     high = nrow; \
254     while (high - low > 3) { \
255       t = (low + high) / 2; \
256       if (rp[t] > bcol) high = t; \
257       else low = t; \
258     } \
259     for (_i = low; _i < high; _i++) { \
260       if (rp[_i] > bcol) break; \
261       if (rp[_i] == bcol) { \
262         bap = ap + bs2 * _i + bs * cidx + ridx; \
263         if (addv == ADD_VALUES) *bap += value; \
264         else *bap = value; \
265         goto b_noinsert; \
266       } \
267     } \
268     if (b->nonew == 1) goto b_noinsert; \
269     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
270     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
271     N = nrow++ - 1; \
272     /* shift up all the later entries in this row */ \
273     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
274     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
275     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
276     rp[_i]                          = bcol; \
277     ap[bs2 * _i + bs * cidx + ridx] = value; \
278     B->nonzerostate++; \
279   b_noinsert:; \
280     bilen[brow] = nrow; \
281   } while (0)
282 
283 /* Only add/insert a(i,j) with i<=j (blocks).
284    Any a(i,j) with i>j input by user is ignored or generates an error
285 */
286 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
287 {
288   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
289   MatScalar     value;
290   PetscBool     roworiented = baij->roworiented;
291   PetscInt      i, j, row, col;
292   PetscInt      rstart_orig = mat->rmap->rstart;
293   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
294   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
295 
296   /* Some Variables required in the macro */
297   Mat           A     = baij->A;
298   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
299   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
300   MatScalar    *aa = a->a;
301 
302   Mat          B     = baij->B;
303   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
304   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
305   MatScalar   *ba = b->a;
306 
307   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
308   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
309   MatScalar *ap, *bap;
310 
311   /* for stash */
312   PetscInt   n_loc, *in_loc = NULL;
313   MatScalar *v_loc = NULL;
314 
315   PetscFunctionBegin;
316   if (!baij->donotstash) {
317     if (n > baij->n_loc) {
318       PetscCall(PetscFree(baij->in_loc));
319       PetscCall(PetscFree(baij->v_loc));
320       PetscCall(PetscMalloc1(n, &baij->in_loc));
321       PetscCall(PetscMalloc1(n, &baij->v_loc));
322 
323       baij->n_loc = n;
324     }
325     in_loc = baij->in_loc;
326     v_loc  = baij->v_loc;
327   }
328 
329   for (i = 0; i < m; i++) {
330     if (im[i] < 0) continue;
331     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
332     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
333       row = im[i] - rstart_orig;                     /* local row index */
334       for (j = 0; j < n; j++) {
335         if (im[i] / bs > in[j] / bs) {
336           if (a->ignore_ltriangular) {
337             continue; /* ignore lower triangular blocks */
338           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
339         }
340         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
341           col  = in[j] - cstart_orig;                    /* local col index */
342           brow = row / bs;
343           bcol = col / bs;
344           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
345           if (roworiented) value = v[i * n + j];
346           else value = v[i + j * m];
347           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
348           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
349         } else if (in[j] < 0) {
350           continue;
351         } else {
352           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
353           /* off-diag entry (B) */
354           if (mat->was_assembled) {
355             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
356 #if defined(PETSC_USE_CTABLE)
357             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
358             col = col - 1;
359 #else
360             col = baij->colmap[in[j] / bs] - 1;
361 #endif
362             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
363               PetscCall(MatDisAssemble_MPISBAIJ(mat));
364               col = in[j];
365               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
366               B     = baij->B;
367               b     = (Mat_SeqBAIJ *)(B)->data;
368               bimax = b->imax;
369               bi    = b->i;
370               bilen = b->ilen;
371               bj    = b->j;
372               ba    = b->a;
373             } else col += in[j] % bs;
374           } else col = in[j];
375           if (roworiented) value = v[i * n + j];
376           else value = v[i + j * m];
377           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
378           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
379         }
380       }
381     } else { /* off processor entry */
382       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
383       if (!baij->donotstash) {
384         mat->assembled = PETSC_FALSE;
385         n_loc          = 0;
386         for (j = 0; j < n; j++) {
387           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
388           in_loc[n_loc] = in[j];
389           if (roworiented) {
390             v_loc[n_loc] = v[i * n + j];
391           } else {
392             v_loc[n_loc] = v[j * m + i];
393           }
394           n_loc++;
395         }
396         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
397       }
398     }
399   }
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
404 {
405   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
406   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
407   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
408   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
409   PetscBool          roworiented = a->roworiented;
410   const PetscScalar *value       = v;
411   MatScalar         *ap, *aa = a->a, *bap;
412 
413   PetscFunctionBegin;
414   if (col < row) {
415     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
416     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
417   }
418   rp    = aj + ai[row];
419   ap    = aa + bs2 * ai[row];
420   rmax  = imax[row];
421   nrow  = ailen[row];
422   value = v;
423   low   = 0;
424   high  = nrow;
425 
426   while (high - low > 7) {
427     t = (low + high) / 2;
428     if (rp[t] > col) high = t;
429     else low = t;
430   }
431   for (i = low; i < high; i++) {
432     if (rp[i] > col) break;
433     if (rp[i] == col) {
434       bap = ap + bs2 * i;
435       if (roworiented) {
436         if (is == ADD_VALUES) {
437           for (ii = 0; ii < bs; ii++) {
438             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
439           }
440         } else {
441           for (ii = 0; ii < bs; ii++) {
442             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
443           }
444         }
445       } else {
446         if (is == ADD_VALUES) {
447           for (ii = 0; ii < bs; ii++) {
448             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
449           }
450         } else {
451           for (ii = 0; ii < bs; ii++) {
452             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
453           }
454         }
455       }
456       goto noinsert2;
457     }
458   }
459   if (nonew == 1) goto noinsert2;
460   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
461   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
462   N = nrow++ - 1;
463   high++;
464   /* shift up all the later entries in this row */
465   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
466   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
467   rp[i] = col;
468   bap   = ap + bs2 * i;
469   if (roworiented) {
470     for (ii = 0; ii < bs; ii++) {
471       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
472     }
473   } else {
474     for (ii = 0; ii < bs; ii++) {
475       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
476     }
477   }
478 noinsert2:;
479   ailen[row] = nrow;
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 
483 /*
484    This routine is exactly duplicated in mpibaij.c
485 */
486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
487 {
488   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
489   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
490   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
491   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
492   PetscBool          roworiented = a->roworiented;
493   const PetscScalar *value       = v;
494   MatScalar         *ap, *aa = a->a, *bap;
495 
496   PetscFunctionBegin;
497   rp    = aj + ai[row];
498   ap    = aa + bs2 * ai[row];
499   rmax  = imax[row];
500   nrow  = ailen[row];
501   low   = 0;
502   high  = nrow;
503   value = v;
504   while (high - low > 7) {
505     t = (low + high) / 2;
506     if (rp[t] > col) high = t;
507     else low = t;
508   }
509   for (i = low; i < high; i++) {
510     if (rp[i] > col) break;
511     if (rp[i] == col) {
512       bap = ap + bs2 * i;
513       if (roworiented) {
514         if (is == ADD_VALUES) {
515           for (ii = 0; ii < bs; ii++) {
516             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
517           }
518         } else {
519           for (ii = 0; ii < bs; ii++) {
520             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
521           }
522         }
523       } else {
524         if (is == ADD_VALUES) {
525           for (ii = 0; ii < bs; ii++, value += bs) {
526             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
527             bap += bs;
528           }
529         } else {
530           for (ii = 0; ii < bs; ii++, value += bs) {
531             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
532             bap += bs;
533           }
534         }
535       }
536       goto noinsert2;
537     }
538   }
539   if (nonew == 1) goto noinsert2;
540   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
541   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
542   N = nrow++ - 1;
543   high++;
544   /* shift up all the later entries in this row */
545   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
546   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
547   rp[i] = col;
548   bap   = ap + bs2 * i;
549   if (roworiented) {
550     for (ii = 0; ii < bs; ii++) {
551       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
552     }
553   } else {
554     for (ii = 0; ii < bs; ii++) {
555       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
556     }
557   }
558 noinsert2:;
559   ailen[row] = nrow;
560   PetscFunctionReturn(PETSC_SUCCESS);
561 }
562 
563 /*
564     This routine could be optimized by removing the need for the block copy below and passing stride information
565   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
566 */
567 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
568 {
569   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
570   const MatScalar *value;
571   MatScalar       *barray      = baij->barray;
572   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
573   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
574   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
575   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
576 
577   PetscFunctionBegin;
578   if (!barray) {
579     PetscCall(PetscMalloc1(bs2, &barray));
580     baij->barray = barray;
581   }
582 
583   if (roworiented) {
584     stepval = (n - 1) * bs;
585   } else {
586     stepval = (m - 1) * bs;
587   }
588   for (i = 0; i < m; i++) {
589     if (im[i] < 0) continue;
590     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
591     if (im[i] >= rstart && im[i] < rend) {
592       row = im[i] - rstart;
593       for (j = 0; j < n; j++) {
594         if (im[i] > in[j]) {
595           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
596           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
597         }
598         /* If NumCol = 1 then a copy is not required */
599         if ((roworiented) && (n == 1)) {
600           barray = (MatScalar *)v + i * bs2;
601         } else if ((!roworiented) && (m == 1)) {
602           barray = (MatScalar *)v + j * bs2;
603         } else { /* Here a copy is required */
604           if (roworiented) {
605             value = v + i * (stepval + bs) * bs + j * bs;
606           } else {
607             value = v + j * (stepval + bs) * bs + i * bs;
608           }
609           for (ii = 0; ii < bs; ii++, value += stepval) {
610             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
611           }
612           barray -= bs2;
613         }
614 
615         if (in[j] >= cstart && in[j] < cend) {
616           col = in[j] - cstart;
617           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
618         } else if (in[j] < 0) {
619           continue;
620         } else {
621           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
622           if (mat->was_assembled) {
623             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624 
625 #if defined(PETSC_USE_DEBUG)
626   #if defined(PETSC_USE_CTABLE)
627             {
628               PetscInt data;
629               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
630               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
631             }
632   #else
633             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
634   #endif
635 #endif
636 #if defined(PETSC_USE_CTABLE)
637             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
638             col = (col - 1) / bs;
639 #else
640             col = (baij->colmap[in[j]] - 1) / bs;
641 #endif
642             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
643               PetscCall(MatDisAssemble_MPISBAIJ(mat));
644               col = in[j];
645             }
646           } else col = in[j];
647           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
648         }
649       }
650     } else {
651       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
652       if (!baij->donotstash) {
653         if (roworiented) {
654           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
655         } else {
656           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         }
658       }
659     }
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
665 {
666   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
667   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
668   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
669 
670   PetscFunctionBegin;
671   for (i = 0; i < m; i++) {
672     if (idxm[i] < 0) continue; /* negative row */
673     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
674     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
675       row = idxm[i] - bsrstart;
676       for (j = 0; j < n; j++) {
677         if (idxn[j] < 0) continue; /* negative column */
678         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
679         if (idxn[j] >= bscstart && idxn[j] < bscend) {
680           col = idxn[j] - bscstart;
681           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
682         } else {
683           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
684 #if defined(PETSC_USE_CTABLE)
685           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
686           data--;
687 #else
688           data = baij->colmap[idxn[j] / bs] - 1;
689 #endif
690           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
691           else {
692             col = data + idxn[j] % bs;
693             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
694           }
695         }
696       }
697     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
698   }
699   PetscFunctionReturn(PETSC_SUCCESS);
700 }
701 
702 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
703 {
704   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
705   PetscReal     sum[2], *lnorm2;
706 
707   PetscFunctionBegin;
708   if (baij->size == 1) {
709     PetscCall(MatNorm(baij->A, type, norm));
710   } else {
711     if (type == NORM_FROBENIUS) {
712       PetscCall(PetscMalloc1(2, &lnorm2));
713       PetscCall(MatNorm(baij->A, type, lnorm2));
714       *lnorm2 = (*lnorm2) * (*lnorm2);
715       lnorm2++; /* squar power of norm(A) */
716       PetscCall(MatNorm(baij->B, type, lnorm2));
717       *lnorm2 = (*lnorm2) * (*lnorm2);
718       lnorm2--; /* squar power of norm(B) */
719       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
720       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
721       PetscCall(PetscFree(lnorm2));
722     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
723       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
724       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
725       PetscReal    *rsum, *rsum2, vabs;
726       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
727       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
728       MatScalar    *v;
729 
730       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
731       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
732       /* Amat */
733       v  = amat->a;
734       jj = amat->j;
735       for (brow = 0; brow < mbs; brow++) {
736         grow = bs * (rstart + brow);
737         nz   = amat->i[brow + 1] - amat->i[brow];
738         for (bcol = 0; bcol < nz; bcol++) {
739           gcol = bs * (rstart + *jj);
740           jj++;
741           for (col = 0; col < bs; col++) {
742             for (row = 0; row < bs; row++) {
743               vabs = PetscAbsScalar(*v);
744               v++;
745               rsum[gcol + col] += vabs;
746               /* non-diagonal block */
747               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
748             }
749           }
750         }
751         PetscCall(PetscLogFlops(nz * bs * bs));
752       }
753       /* Bmat */
754       v  = bmat->a;
755       jj = bmat->j;
756       for (brow = 0; brow < mbs; brow++) {
757         grow = bs * (rstart + brow);
758         nz   = bmat->i[brow + 1] - bmat->i[brow];
759         for (bcol = 0; bcol < nz; bcol++) {
760           gcol = bs * garray[*jj];
761           jj++;
762           for (col = 0; col < bs; col++) {
763             for (row = 0; row < bs; row++) {
764               vabs = PetscAbsScalar(*v);
765               v++;
766               rsum[gcol + col] += vabs;
767               rsum[grow + row] += vabs;
768             }
769           }
770         }
771         PetscCall(PetscLogFlops(nz * bs * bs));
772       }
773       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
774       *norm = 0.0;
775       for (col = 0; col < mat->cmap->N; col++) {
776         if (rsum2[col] > *norm) *norm = rsum2[col];
777       }
778       PetscCall(PetscFree2(rsum, rsum2));
779     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
780   }
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
785 {
786   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
787   PetscInt      nstash, reallocs;
788 
789   PetscFunctionBegin;
790   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
791 
792   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
793   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
794   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
795   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
802 {
803   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
804   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
805   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
806   PetscInt     *row, *col;
807   PetscBool     other_disassembled;
808   PetscMPIInt   n;
809   PetscBool     r1, r2, r3;
810   MatScalar    *val;
811 
812   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
813   PetscFunctionBegin;
814   if (!baij->donotstash && !mat->nooffprocentries) {
815     while (1) {
816       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
817       if (!flg) break;
818 
819       for (i = 0; i < n;) {
820         /* Now identify the consecutive vals belonging to the same row */
821         for (j = i, rstart = row[j]; j < n; j++) {
822           if (row[j] != rstart) break;
823         }
824         if (j < n) ncols = j - i;
825         else ncols = n - i;
826         /* Now assemble all these values with a single function call */
827         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
828         i = j;
829       }
830     }
831     PetscCall(MatStashScatterEnd_Private(&mat->stash));
832     /* Now process the block-stash. Since the values are stashed column-oriented,
833        set the row-oriented flag to column-oriented, and after MatSetValues()
834        restore the original flags */
835     r1 = baij->roworiented;
836     r2 = a->roworiented;
837     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
838 
839     baij->roworiented = PETSC_FALSE;
840     a->roworiented    = PETSC_FALSE;
841 
842     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
843     while (1) {
844       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
845       if (!flg) break;
846 
847       for (i = 0; i < n;) {
848         /* Now identify the consecutive vals belonging to the same row */
849         for (j = i, rstart = row[j]; j < n; j++) {
850           if (row[j] != rstart) break;
851         }
852         if (j < n) ncols = j - i;
853         else ncols = n - i;
854         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
855         i = j;
856       }
857     }
858     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
859 
860     baij->roworiented = r1;
861     a->roworiented    = r2;
862 
863     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
864   }
865 
866   PetscCall(MatAssemblyBegin(baij->A, mode));
867   PetscCall(MatAssemblyEnd(baij->A, mode));
868 
869   /* determine if any processor has disassembled, if so we must
870      also disassemble ourselves, in order that we may reassemble. */
871   /*
872      if nonzero structure of submatrix B cannot change then we know that
873      no processor disassembled thus we can skip this stuff
874   */
875   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
876     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
877     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
878   }
879 
880   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
881   PetscCall(MatAssemblyBegin(baij->B, mode));
882   PetscCall(MatAssemblyEnd(baij->B, mode));
883 
884   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
885 
886   baij->rowvalues = NULL;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
890     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
891     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
892   }
893   PetscFunctionReturn(PETSC_SUCCESS);
894 }
895 
896 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
897 #include <petscdraw.h>
898 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
899 {
900   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
901   PetscInt          bs   = mat->rmap->bs;
902   PetscMPIInt       rank = baij->rank;
903   PetscBool         iascii, isdraw;
904   PetscViewer       sviewer;
905   PetscViewerFormat format;
906 
907   PetscFunctionBegin;
908   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
909   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
910   if (iascii) {
911     PetscCall(PetscViewerGetFormat(viewer, &format));
912     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
913       MatInfo info;
914       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
915       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
916       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
917       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
918                                                    mat->rmap->bs, (double)info.memory));
919       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
920       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
921       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(PetscViewerFlush(viewer));
924       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
925       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
926       PetscCall(VecScatterView(baij->Mvctx, viewer));
927       PetscFunctionReturn(PETSC_SUCCESS);
928     } else if (format == PETSC_VIEWER_ASCII_INFO) {
929       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
930       PetscFunctionReturn(PETSC_SUCCESS);
931     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
932       PetscFunctionReturn(PETSC_SUCCESS);
933     }
934   }
935 
936   if (isdraw) {
937     PetscDraw draw;
938     PetscBool isnull;
939     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
940     PetscCall(PetscDrawIsNull(draw, &isnull));
941     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
942   }
943 
944   {
945     /* assemble the entire matrix onto first processor. */
946     Mat           A;
947     Mat_SeqSBAIJ *Aloc;
948     Mat_SeqBAIJ  *Bloc;
949     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
950     MatScalar    *a;
951     const char   *matname;
952 
953     /* Should this be the same type as mat? */
954     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
955     if (rank == 0) {
956       PetscCall(MatSetSizes(A, M, N, M, N));
957     } else {
958       PetscCall(MatSetSizes(A, 0, 0, M, N));
959     }
960     PetscCall(MatSetType(A, MATMPISBAIJ));
961     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
962     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
963 
964     /* copy over the A part */
965     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
966     ai   = Aloc->i;
967     aj   = Aloc->j;
968     a    = Aloc->a;
969     PetscCall(PetscMalloc1(bs, &rvals));
970 
971     for (i = 0; i < mbs; i++) {
972       rvals[0] = bs * (baij->rstartbs + i);
973       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
974       for (j = ai[i]; j < ai[i + 1]; j++) {
975         col = (baij->cstartbs + aj[j]) * bs;
976         for (k = 0; k < bs; k++) {
977           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
978           col++;
979           a += bs;
980         }
981       }
982     }
983     /* copy over the B part */
984     Bloc = (Mat_SeqBAIJ *)baij->B->data;
985     ai   = Bloc->i;
986     aj   = Bloc->j;
987     a    = Bloc->a;
988     for (i = 0; i < mbs; i++) {
989       rvals[0] = bs * (baij->rstartbs + i);
990       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
991       for (j = ai[i]; j < ai[i + 1]; j++) {
992         col = baij->garray[aj[j]] * bs;
993         for (k = 0; k < bs; k++) {
994           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
995           col++;
996           a += bs;
997         }
998       }
999     }
1000     PetscCall(PetscFree(rvals));
1001     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1002     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1003     /*
1004        Everyone has to call to draw the matrix since the graphics waits are
1005        synchronized across all processors that share the PetscDraw object
1006     */
1007     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1008     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1009     if (rank == 0) {
1010       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
1011       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
1012     }
1013     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1014     PetscCall(MatDestroy(&A));
1015   }
1016   PetscFunctionReturn(PETSC_SUCCESS);
1017 }
1018 
1019 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1020 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1021 
1022 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1023 {
1024   PetscBool iascii, isdraw, issocket, isbinary;
1025 
1026   PetscFunctionBegin;
1027   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1029   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1031   if (iascii || isdraw || issocket) {
1032     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1033   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1038 {
1039   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1040   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1041   PetscScalar       *from;
1042   const PetscScalar *x;
1043 
1044   PetscFunctionBegin;
1045   /* diagonal part */
1046   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1047   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1048   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1049   PetscCall(VecZeroEntries(a->slvec1b));
1050 
1051   /* subdiagonal part */
1052   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1053   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1054 
1055   /* copy x into the vec slvec0 */
1056   PetscCall(VecGetArray(a->slvec0, &from));
1057   PetscCall(VecGetArrayRead(xx, &x));
1058 
1059   PetscCall(PetscArraycpy(from, x, bs * mbs));
1060   PetscCall(VecRestoreArray(a->slvec0, &from));
1061   PetscCall(VecRestoreArrayRead(xx, &x));
1062 
1063   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1064   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1065   /* supperdiagonal part */
1066   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1073   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1074   PetscScalar       *from;
1075   const PetscScalar *x;
1076 
1077   PetscFunctionBegin;
1078   /* diagonal part */
1079   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1080   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1081   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1082   PetscCall(VecZeroEntries(a->slvec1b));
1083 
1084   /* subdiagonal part */
1085   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1086 
1087   /* copy x into the vec slvec0 */
1088   PetscCall(VecGetArray(a->slvec0, &from));
1089   PetscCall(VecGetArrayRead(xx, &x));
1090 
1091   PetscCall(PetscArraycpy(from, x, bs * mbs));
1092   PetscCall(VecRestoreArray(a->slvec0, &from));
1093   PetscCall(VecRestoreArrayRead(xx, &x));
1094 
1095   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1096   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1097   /* supperdiagonal part */
1098   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1099   PetscFunctionReturn(PETSC_SUCCESS);
1100 }
1101 
1102 #if PetscDefined(USE_COMPLEX)
1103 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1104 {
1105   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1106   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1107   PetscScalar       *from;
1108   const PetscScalar *x;
1109 
1110   PetscFunctionBegin;
1111   /* diagonal part */
1112   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1113   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1114   PetscCall(VecZeroEntries(a->slvec1b));
1115 
1116   /* subdiagonal part */
1117   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1118   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1119 
1120   /* copy x into the vec slvec0 */
1121   PetscCall(VecGetArray(a->slvec0, &from));
1122   PetscCall(VecGetArrayRead(xx, &x));
1123   PetscCall(PetscArraycpy(from, x, bs * mbs));
1124   PetscCall(VecRestoreArray(a->slvec0, &from));
1125 
1126   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1127   PetscCall(VecRestoreArrayRead(xx, &x));
1128   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1129 
1130   /* supperdiagonal part */
1131   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 #endif
1135 
1136 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1137 {
1138   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1139   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1140   PetscScalar       *from;
1141   const PetscScalar *x;
1142 
1143   PetscFunctionBegin;
1144   /* diagonal part */
1145   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1146   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1147   PetscCall(VecZeroEntries(a->slvec1b));
1148 
1149   /* subdiagonal part */
1150   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1151 
1152   /* copy x into the vec slvec0 */
1153   PetscCall(VecGetArray(a->slvec0, &from));
1154   PetscCall(VecGetArrayRead(xx, &x));
1155   PetscCall(PetscArraycpy(from, x, bs * mbs));
1156   PetscCall(VecRestoreArray(a->slvec0, &from));
1157 
1158   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1159   PetscCall(VecRestoreArrayRead(xx, &x));
1160   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1161 
1162   /* supperdiagonal part */
1163   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1164   PetscFunctionReturn(PETSC_SUCCESS);
1165 }
1166 
1167 /*
1168   This only works correctly for square matrices where the subblock A->A is the
1169    diagonal block
1170 */
1171 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1172 {
1173   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1174 
1175   PetscFunctionBegin;
1176   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1177   PetscCall(MatGetDiagonal(a->A, v));
1178   PetscFunctionReturn(PETSC_SUCCESS);
1179 }
1180 
1181 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1182 {
1183   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1184 
1185   PetscFunctionBegin;
1186   PetscCall(MatScale(a->A, aa));
1187   PetscCall(MatScale(a->B, aa));
1188   PetscFunctionReturn(PETSC_SUCCESS);
1189 }
1190 
1191 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1192 {
1193   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1194   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1195   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1196   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1197   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1198 
1199   PetscFunctionBegin;
1200   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1201   mat->getrowactive = PETSC_TRUE;
1202 
1203   if (!mat->rowvalues && (idx || v)) {
1204     /*
1205         allocate enough space to hold information from the longest row.
1206     */
1207     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1208     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1209     PetscInt      max = 1, mbs = mat->mbs, tmp;
1210     for (i = 0; i < mbs; i++) {
1211       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1212       if (max < tmp) max = tmp;
1213     }
1214     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1215   }
1216 
1217   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1218   lrow = row - brstart; /* local row index */
1219 
1220   pvA = &vworkA;
1221   pcA = &cworkA;
1222   pvB = &vworkB;
1223   pcB = &cworkB;
1224   if (!v) {
1225     pvA = NULL;
1226     pvB = NULL;
1227   }
1228   if (!idx) {
1229     pcA = NULL;
1230     if (!v) pcB = NULL;
1231   }
1232   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1233   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1234   nztot = nzA + nzB;
1235 
1236   cmap = mat->garray;
1237   if (v || idx) {
1238     if (nztot) {
1239       /* Sort by increasing column numbers, assuming A and B already sorted */
1240       PetscInt imark = -1;
1241       if (v) {
1242         *v = v_p = mat->rowvalues;
1243         for (i = 0; i < nzB; i++) {
1244           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1245           else break;
1246         }
1247         imark = i;
1248         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1249         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1250       }
1251       if (idx) {
1252         *idx = idx_p = mat->rowindices;
1253         if (imark > -1) {
1254           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1255         } else {
1256           for (i = 0; i < nzB; i++) {
1257             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1258             else break;
1259           }
1260           imark = i;
1261         }
1262         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1263         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1264       }
1265     } else {
1266       if (idx) *idx = NULL;
1267       if (v) *v = NULL;
1268     }
1269   }
1270   *nz = nztot;
1271   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1272   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1273   PetscFunctionReturn(PETSC_SUCCESS);
1274 }
1275 
1276 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1277 {
1278   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1279 
1280   PetscFunctionBegin;
1281   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1282   baij->getrowactive = PETSC_FALSE;
1283   PetscFunctionReturn(PETSC_SUCCESS);
1284 }
1285 
1286 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1287 {
1288   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1289   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1290 
1291   PetscFunctionBegin;
1292   aA->getrow_utriangular = PETSC_TRUE;
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1296 {
1297   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1298   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1299 
1300   PetscFunctionBegin;
1301   aA->getrow_utriangular = PETSC_FALSE;
1302   PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304 
1305 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1306 {
1307   PetscFunctionBegin;
1308   if (PetscDefined(USE_COMPLEX)) {
1309     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1310 
1311     PetscCall(MatConjugate(a->A));
1312     PetscCall(MatConjugate(a->B));
1313   }
1314   PetscFunctionReturn(PETSC_SUCCESS);
1315 }
1316 
1317 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1318 {
1319   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1320 
1321   PetscFunctionBegin;
1322   PetscCall(MatRealPart(a->A));
1323   PetscCall(MatRealPart(a->B));
1324   PetscFunctionReturn(PETSC_SUCCESS);
1325 }
1326 
1327 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1328 {
1329   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1330 
1331   PetscFunctionBegin;
1332   PetscCall(MatImaginaryPart(a->A));
1333   PetscCall(MatImaginaryPart(a->B));
1334   PetscFunctionReturn(PETSC_SUCCESS);
1335 }
1336 
1337 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1338    Input: isrow       - distributed(parallel),
1339           iscol_local - locally owned (seq)
1340 */
1341 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1342 {
1343   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1344   const PetscInt *ptr1, *ptr2;
1345 
1346   PetscFunctionBegin;
1347   *flg = PETSC_FALSE;
1348   PetscCall(ISGetLocalSize(isrow, &sz1));
1349   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1350   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1351 
1352   PetscCall(ISGetIndices(isrow, &ptr1));
1353   PetscCall(ISGetIndices(iscol_local, &ptr2));
1354 
1355   PetscCall(PetscMalloc1(sz1, &a1));
1356   PetscCall(PetscMalloc1(sz2, &a2));
1357   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1358   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1359   PetscCall(PetscSortInt(sz1, a1));
1360   PetscCall(PetscSortInt(sz2, a2));
1361 
1362   nmatch = 0;
1363   k      = 0;
1364   for (i = 0; i < sz1; i++) {
1365     for (j = k; j < sz2; j++) {
1366       if (a1[i] == a2[j]) {
1367         k = j;
1368         nmatch++;
1369         break;
1370       }
1371     }
1372   }
1373   PetscCall(ISRestoreIndices(isrow, &ptr1));
1374   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1375   PetscCall(PetscFree(a1));
1376   PetscCall(PetscFree(a2));
1377   if (nmatch < sz1) {
1378     *flg = PETSC_FALSE;
1379   } else {
1380     *flg = PETSC_TRUE;
1381   }
1382   PetscFunctionReturn(PETSC_SUCCESS);
1383 }
1384 
1385 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1386 {
1387   Mat       C[2];
1388   IS        iscol_local, isrow_local;
1389   PetscInt  csize, csize_local, rsize;
1390   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1391 
1392   PetscFunctionBegin;
1393   PetscCall(ISGetLocalSize(iscol, &csize));
1394   PetscCall(ISGetLocalSize(isrow, &rsize));
1395   if (call == MAT_REUSE_MATRIX) {
1396     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1397     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1398   } else {
1399     PetscCall(ISAllGather(iscol, &iscol_local));
1400     PetscCall(ISSorted(iscol_local, &issorted));
1401     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1402   }
1403   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1404   if (!isequal) {
1405     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1406     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1407     if (!isidentity) {
1408       if (call == MAT_REUSE_MATRIX) {
1409         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1410         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1411       } else {
1412         PetscCall(ISAllGather(isrow, &isrow_local));
1413         PetscCall(ISSorted(isrow_local, &issorted));
1414         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1415       }
1416     }
1417   }
1418   /* now call MatCreateSubMatrix_MPIBAIJ() */
1419   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1420   if (!isequal && !isidentity) {
1421     if (call == MAT_INITIAL_MATRIX) {
1422       IS       intersect;
1423       PetscInt ni;
1424 
1425       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1426       PetscCall(ISGetLocalSize(intersect, &ni));
1427       PetscCall(ISDestroy(&intersect));
1428       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1429     }
1430     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1431     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1432     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1433     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1434     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1435     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1436     PetscCall(MatDestroy(C));
1437     PetscCall(MatDestroy(C + 1));
1438   }
1439   if (call == MAT_INITIAL_MATRIX) {
1440     if (!isequal && !isidentity) {
1441       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1442       PetscCall(ISDestroy(&isrow_local));
1443     }
1444     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1445     PetscCall(ISDestroy(&iscol_local));
1446   }
1447   PetscFunctionReturn(PETSC_SUCCESS);
1448 }
1449 
1450 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1451 {
1452   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1453 
1454   PetscFunctionBegin;
1455   PetscCall(MatZeroEntries(l->A));
1456   PetscCall(MatZeroEntries(l->B));
1457   PetscFunctionReturn(PETSC_SUCCESS);
1458 }
1459 
1460 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1461 {
1462   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1463   Mat            A = a->A, B = a->B;
1464   PetscLogDouble isend[5], irecv[5];
1465 
1466   PetscFunctionBegin;
1467   info->block_size = (PetscReal)matin->rmap->bs;
1468 
1469   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1470 
1471   isend[0] = info->nz_used;
1472   isend[1] = info->nz_allocated;
1473   isend[2] = info->nz_unneeded;
1474   isend[3] = info->memory;
1475   isend[4] = info->mallocs;
1476 
1477   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1478 
1479   isend[0] += info->nz_used;
1480   isend[1] += info->nz_allocated;
1481   isend[2] += info->nz_unneeded;
1482   isend[3] += info->memory;
1483   isend[4] += info->mallocs;
1484   if (flag == MAT_LOCAL) {
1485     info->nz_used      = isend[0];
1486     info->nz_allocated = isend[1];
1487     info->nz_unneeded  = isend[2];
1488     info->memory       = isend[3];
1489     info->mallocs      = isend[4];
1490   } else if (flag == MAT_GLOBAL_MAX) {
1491     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1492 
1493     info->nz_used      = irecv[0];
1494     info->nz_allocated = irecv[1];
1495     info->nz_unneeded  = irecv[2];
1496     info->memory       = irecv[3];
1497     info->mallocs      = irecv[4];
1498   } else if (flag == MAT_GLOBAL_SUM) {
1499     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1500 
1501     info->nz_used      = irecv[0];
1502     info->nz_allocated = irecv[1];
1503     info->nz_unneeded  = irecv[2];
1504     info->memory       = irecv[3];
1505     info->mallocs      = irecv[4];
1506   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1507   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1508   info->fill_ratio_needed = 0;
1509   info->factor_mallocs    = 0;
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 }
1512 
1513 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1514 {
1515   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1516   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1517 
1518   PetscFunctionBegin;
1519   switch (op) {
1520   case MAT_NEW_NONZERO_LOCATIONS:
1521   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1522   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1523   case MAT_KEEP_NONZERO_PATTERN:
1524   case MAT_SUBMAT_SINGLEIS:
1525   case MAT_NEW_NONZERO_LOCATION_ERR:
1526     MatCheckPreallocated(A, 1);
1527     PetscCall(MatSetOption(a->A, op, flg));
1528     PetscCall(MatSetOption(a->B, op, flg));
1529     break;
1530   case MAT_ROW_ORIENTED:
1531     MatCheckPreallocated(A, 1);
1532     a->roworiented = flg;
1533 
1534     PetscCall(MatSetOption(a->A, op, flg));
1535     PetscCall(MatSetOption(a->B, op, flg));
1536     break;
1537   case MAT_FORCE_DIAGONAL_ENTRIES:
1538   case MAT_SORTED_FULL:
1539     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1540     break;
1541   case MAT_IGNORE_OFF_PROC_ENTRIES:
1542     a->donotstash = flg;
1543     break;
1544   case MAT_USE_HASH_TABLE:
1545     a->ht_flag = flg;
1546     break;
1547   case MAT_HERMITIAN:
1548     MatCheckPreallocated(A, 1);
1549     PetscCall(MatSetOption(a->A, op, flg));
1550 #if defined(PETSC_USE_COMPLEX)
1551     if (flg) { /* need different mat-vec ops */
1552       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1553       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1554       A->ops->multtranspose    = NULL;
1555       A->ops->multtransposeadd = NULL;
1556       A->symmetric             = PETSC_BOOL3_FALSE;
1557     }
1558 #endif
1559     break;
1560   case MAT_SPD:
1561   case MAT_SYMMETRIC:
1562     MatCheckPreallocated(A, 1);
1563     PetscCall(MatSetOption(a->A, op, flg));
1564 #if defined(PETSC_USE_COMPLEX)
1565     if (flg) { /* restore to use default mat-vec ops */
1566       A->ops->mult             = MatMult_MPISBAIJ;
1567       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1568       A->ops->multtranspose    = MatMult_MPISBAIJ;
1569       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1570     }
1571 #endif
1572     break;
1573   case MAT_STRUCTURALLY_SYMMETRIC:
1574     MatCheckPreallocated(A, 1);
1575     PetscCall(MatSetOption(a->A, op, flg));
1576     break;
1577   case MAT_SYMMETRY_ETERNAL:
1578   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1579     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1580     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1581     break;
1582   case MAT_SPD_ETERNAL:
1583     break;
1584   case MAT_IGNORE_LOWER_TRIANGULAR:
1585     aA->ignore_ltriangular = flg;
1586     break;
1587   case MAT_ERROR_LOWER_TRIANGULAR:
1588     aA->ignore_ltriangular = flg;
1589     break;
1590   case MAT_GETROW_UPPERTRIANGULAR:
1591     aA->getrow_utriangular = flg;
1592     break;
1593   default:
1594     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1595   }
1596   PetscFunctionReturn(PETSC_SUCCESS);
1597 }
1598 
1599 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1600 {
1601   PetscFunctionBegin;
1602   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1603   if (reuse == MAT_INITIAL_MATRIX) {
1604     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1605   } else if (reuse == MAT_REUSE_MATRIX) {
1606     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1607   }
1608   PetscFunctionReturn(PETSC_SUCCESS);
1609 }
1610 
1611 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1612 {
1613   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1614   Mat           a = baij->A, b = baij->B;
1615   PetscInt      nv, m, n;
1616   PetscBool     flg;
1617 
1618   PetscFunctionBegin;
1619   if (ll != rr) {
1620     PetscCall(VecEqual(ll, rr, &flg));
1621     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1622   }
1623   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1624 
1625   PetscCall(MatGetLocalSize(mat, &m, &n));
1626   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1627 
1628   PetscCall(VecGetLocalSize(rr, &nv));
1629   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1630 
1631   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1632 
1633   /* left diagonalscale the off-diagonal part */
1634   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1635 
1636   /* scale the diagonal part */
1637   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1638 
1639   /* right diagonalscale the off-diagonal part */
1640   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1641   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1642   PetscFunctionReturn(PETSC_SUCCESS);
1643 }
1644 
1645 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1646 {
1647   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1648 
1649   PetscFunctionBegin;
1650   PetscCall(MatSetUnfactored(a->A));
1651   PetscFunctionReturn(PETSC_SUCCESS);
1652 }
1653 
1654 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1655 
1656 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1657 {
1658   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1659   Mat           a, b, c, d;
1660   PetscBool     flg;
1661 
1662   PetscFunctionBegin;
1663   a = matA->A;
1664   b = matA->B;
1665   c = matB->A;
1666   d = matB->B;
1667 
1668   PetscCall(MatEqual(a, c, &flg));
1669   if (flg) PetscCall(MatEqual(b, d, &flg));
1670   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1671   PetscFunctionReturn(PETSC_SUCCESS);
1672 }
1673 
1674 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1675 {
1676   PetscBool isbaij;
1677 
1678   PetscFunctionBegin;
1679   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1680   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1681   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1682   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1683     PetscCall(MatGetRowUpperTriangular(A));
1684     PetscCall(MatCopy_Basic(A, B, str));
1685     PetscCall(MatRestoreRowUpperTriangular(A));
1686   } else {
1687     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1688     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1689 
1690     PetscCall(MatCopy(a->A, b->A, str));
1691     PetscCall(MatCopy(a->B, b->B, str));
1692   }
1693   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1694   PetscFunctionReturn(PETSC_SUCCESS);
1695 }
1696 
1697 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1698 {
1699   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1700   PetscBLASInt  bnz, one                          = 1;
1701   Mat_SeqSBAIJ *xa, *ya;
1702   Mat_SeqBAIJ  *xb, *yb;
1703 
1704   PetscFunctionBegin;
1705   if (str == SAME_NONZERO_PATTERN) {
1706     PetscScalar alpha = a;
1707     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1708     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1709     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1710     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1711     xb = (Mat_SeqBAIJ *)xx->B->data;
1712     yb = (Mat_SeqBAIJ *)yy->B->data;
1713     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1714     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1715     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1716   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1717     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1718     PetscCall(MatAXPY_Basic(Y, a, X, str));
1719     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1720   } else {
1721     Mat       B;
1722     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1723     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1724     PetscCall(MatGetRowUpperTriangular(X));
1725     PetscCall(MatGetRowUpperTriangular(Y));
1726     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1727     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1728     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1729     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1730     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1731     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1732     PetscCall(MatSetType(B, MATMPISBAIJ));
1733     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1734     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1735     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1736     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1737     PetscCall(MatHeaderMerge(Y, &B));
1738     PetscCall(PetscFree(nnz_d));
1739     PetscCall(PetscFree(nnz_o));
1740     PetscCall(MatRestoreRowUpperTriangular(X));
1741     PetscCall(MatRestoreRowUpperTriangular(Y));
1742   }
1743   PetscFunctionReturn(PETSC_SUCCESS);
1744 }
1745 
1746 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1747 {
1748   PetscInt  i;
1749   PetscBool flg;
1750 
1751   PetscFunctionBegin;
1752   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1753   for (i = 0; i < n; i++) {
1754     PetscCall(ISEqual(irow[i], icol[i], &flg));
1755     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1756   }
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1761 {
1762   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1763   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1764 
1765   PetscFunctionBegin;
1766   if (!Y->preallocated) {
1767     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1768   } else if (!aij->nz) {
1769     PetscInt nonew = aij->nonew;
1770     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1771     aij->nonew = nonew;
1772   }
1773   PetscCall(MatShift_Basic(Y, a));
1774   PetscFunctionReturn(PETSC_SUCCESS);
1775 }
1776 
1777 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1778 {
1779   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1780 
1781   PetscFunctionBegin;
1782   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1783   PetscCall(MatMissingDiagonal(a->A, missing, d));
1784   if (d) {
1785     PetscInt rstart;
1786     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1787     *d += rstart / A->rmap->bs;
1788   }
1789   PetscFunctionReturn(PETSC_SUCCESS);
1790 }
1791 
1792 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1793 {
1794   PetscFunctionBegin;
1795   *a = ((Mat_MPISBAIJ *)A->data)->A;
1796   PetscFunctionReturn(PETSC_SUCCESS);
1797 }
1798 
1799 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1800 {
1801   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1802 
1803   PetscFunctionBegin;
1804   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1805   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1806   PetscFunctionReturn(PETSC_SUCCESS);
1807 }
1808 
1809 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1810 
1811 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1812                                        MatGetRow_MPISBAIJ,
1813                                        MatRestoreRow_MPISBAIJ,
1814                                        MatMult_MPISBAIJ,
1815                                        /*  4*/ MatMultAdd_MPISBAIJ,
1816                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1817                                        MatMultAdd_MPISBAIJ,
1818                                        NULL,
1819                                        NULL,
1820                                        NULL,
1821                                        /* 10*/ NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        MatSOR_MPISBAIJ,
1825                                        MatTranspose_MPISBAIJ,
1826                                        /* 15*/ MatGetInfo_MPISBAIJ,
1827                                        MatEqual_MPISBAIJ,
1828                                        MatGetDiagonal_MPISBAIJ,
1829                                        MatDiagonalScale_MPISBAIJ,
1830                                        MatNorm_MPISBAIJ,
1831                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1832                                        MatAssemblyEnd_MPISBAIJ,
1833                                        MatSetOption_MPISBAIJ,
1834                                        MatZeroEntries_MPISBAIJ,
1835                                        /* 24*/ NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        /* 29*/ MatSetUp_MPI_Hash,
1841                                        NULL,
1842                                        NULL,
1843                                        MatGetDiagonalBlock_MPISBAIJ,
1844                                        NULL,
1845                                        /* 34*/ MatDuplicate_MPISBAIJ,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        /* 39*/ MatAXPY_MPISBAIJ,
1851                                        MatCreateSubMatrices_MPISBAIJ,
1852                                        MatIncreaseOverlap_MPISBAIJ,
1853                                        MatGetValues_MPISBAIJ,
1854                                        MatCopy_MPISBAIJ,
1855                                        /* 44*/ NULL,
1856                                        MatScale_MPISBAIJ,
1857                                        MatShift_MPISBAIJ,
1858                                        NULL,
1859                                        NULL,
1860                                        /* 49*/ NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        NULL,
1865                                        /* 54*/ NULL,
1866                                        NULL,
1867                                        MatSetUnfactored_MPISBAIJ,
1868                                        NULL,
1869                                        MatSetValuesBlocked_MPISBAIJ,
1870                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        /* 64*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1881                                        NULL,
1882                                        MatConvert_MPISBAIJ_Basic,
1883                                        NULL,
1884                                        NULL,
1885                                        /* 74*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        /* 79*/ NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        MatLoad_MPISBAIJ,
1895                                        /* 84*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        /* 89*/ NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        /* 94*/ NULL,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        /* 99*/ NULL,
1911                                        NULL,
1912                                        NULL,
1913                                        MatConjugate_MPISBAIJ,
1914                                        NULL,
1915                                        /*104*/ NULL,
1916                                        MatRealPart_MPISBAIJ,
1917                                        MatImaginaryPart_MPISBAIJ,
1918                                        MatGetRowUpperTriangular_MPISBAIJ,
1919                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1920                                        /*109*/ NULL,
1921                                        NULL,
1922                                        NULL,
1923                                        NULL,
1924                                        MatMissingDiagonal_MPISBAIJ,
1925                                        /*114*/ NULL,
1926                                        NULL,
1927                                        NULL,
1928                                        NULL,
1929                                        NULL,
1930                                        /*119*/ NULL,
1931                                        NULL,
1932                                        NULL,
1933                                        NULL,
1934                                        NULL,
1935                                        /*124*/ NULL,
1936                                        NULL,
1937                                        NULL,
1938                                        NULL,
1939                                        NULL,
1940                                        /*129*/ NULL,
1941                                        NULL,
1942                                        NULL,
1943                                        NULL,
1944                                        NULL,
1945                                        /*134*/ NULL,
1946                                        NULL,
1947                                        NULL,
1948                                        NULL,
1949                                        NULL,
1950                                        /*139*/ MatSetBlockSizes_Default,
1951                                        NULL,
1952                                        NULL,
1953                                        NULL,
1954                                        NULL,
1955                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1956                                        NULL,
1957                                        NULL,
1958                                        NULL,
1959                                        NULL,
1960                                        NULL,
1961                                        /*150*/ NULL,
1962                                        MatEliminateZeros_MPISBAIJ,
1963                                        NULL};
1964 
1965 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1966 {
1967   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1968   PetscInt      i, mbs, Mbs;
1969   PetscMPIInt   size;
1970 
1971   PetscFunctionBegin;
1972   if (B->hash_active) {
1973     B->ops[0]      = b->cops;
1974     B->hash_active = PETSC_FALSE;
1975   }
1976   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1977   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1978   PetscCall(PetscLayoutSetUp(B->rmap));
1979   PetscCall(PetscLayoutSetUp(B->cmap));
1980   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1981   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1982   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1983 
1984   mbs = B->rmap->n / bs;
1985   Mbs = B->rmap->N / bs;
1986   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1987 
1988   B->rmap->bs = bs;
1989   b->bs2      = bs * bs;
1990   b->mbs      = mbs;
1991   b->Mbs      = Mbs;
1992   b->nbs      = B->cmap->n / bs;
1993   b->Nbs      = B->cmap->N / bs;
1994 
1995   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1996   b->rstartbs = B->rmap->rstart / bs;
1997   b->rendbs   = B->rmap->rend / bs;
1998 
1999   b->cstartbs = B->cmap->rstart / bs;
2000   b->cendbs   = B->cmap->rend / bs;
2001 
2002 #if defined(PETSC_USE_CTABLE)
2003   PetscCall(PetscHMapIDestroy(&b->colmap));
2004 #else
2005   PetscCall(PetscFree(b->colmap));
2006 #endif
2007   PetscCall(PetscFree(b->garray));
2008   PetscCall(VecDestroy(&b->lvec));
2009   PetscCall(VecScatterDestroy(&b->Mvctx));
2010   PetscCall(VecDestroy(&b->slvec0));
2011   PetscCall(VecDestroy(&b->slvec0b));
2012   PetscCall(VecDestroy(&b->slvec1));
2013   PetscCall(VecDestroy(&b->slvec1a));
2014   PetscCall(VecDestroy(&b->slvec1b));
2015   PetscCall(VecScatterDestroy(&b->sMvctx));
2016 
2017   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2018 
2019   MatSeqXAIJGetOptions_Private(b->B);
2020   PetscCall(MatDestroy(&b->B));
2021   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2022   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2023   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2024   MatSeqXAIJRestoreOptions_Private(b->B);
2025 
2026   MatSeqXAIJGetOptions_Private(b->A);
2027   PetscCall(MatDestroy(&b->A));
2028   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2029   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2030   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
2031   MatSeqXAIJRestoreOptions_Private(b->A);
2032 
2033   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2034   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2035 
2036   B->preallocated  = PETSC_TRUE;
2037   B->was_assembled = PETSC_FALSE;
2038   B->assembled     = PETSC_FALSE;
2039   PetscFunctionReturn(PETSC_SUCCESS);
2040 }
2041 
2042 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2043 {
2044   PetscInt        m, rstart, cend;
2045   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2046   const PetscInt *JJ          = NULL;
2047   PetscScalar    *values      = NULL;
2048   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2049   PetscBool       nooffprocentries;
2050 
2051   PetscFunctionBegin;
2052   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2053   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2054   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2055   PetscCall(PetscLayoutSetUp(B->rmap));
2056   PetscCall(PetscLayoutSetUp(B->cmap));
2057   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2058   m      = B->rmap->n / bs;
2059   rstart = B->rmap->rstart / bs;
2060   cend   = B->cmap->rend / bs;
2061 
2062   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2063   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2064   for (i = 0; i < m; i++) {
2065     nz = ii[i + 1] - ii[i];
2066     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2067     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2068     JJ = jj + ii[i];
2069     bd = 0;
2070     for (j = 0; j < nz; j++) {
2071       if (*JJ >= i + rstart) break;
2072       JJ++;
2073       bd++;
2074     }
2075     d = 0;
2076     for (; j < nz; j++) {
2077       if (*JJ++ >= cend) break;
2078       d++;
2079     }
2080     d_nnz[i] = d;
2081     o_nnz[i] = nz - d - bd;
2082     nz       = nz - bd;
2083     nz_max   = PetscMax(nz_max, nz);
2084   }
2085   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2086   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2087   PetscCall(PetscFree2(d_nnz, o_nnz));
2088 
2089   values = (PetscScalar *)V;
2090   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2091   for (i = 0; i < m; i++) {
2092     PetscInt        row   = i + rstart;
2093     PetscInt        ncols = ii[i + 1] - ii[i];
2094     const PetscInt *icols = jj + ii[i];
2095     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2096       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2097       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2098     } else { /* block ordering does not match so we can only insert one block at a time. */
2099       PetscInt j;
2100       for (j = 0; j < ncols; j++) {
2101         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2102         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2103       }
2104     }
2105   }
2106 
2107   if (!V) PetscCall(PetscFree(values));
2108   nooffprocentries    = B->nooffprocentries;
2109   B->nooffprocentries = PETSC_TRUE;
2110   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2111   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2112   B->nooffprocentries = nooffprocentries;
2113 
2114   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2115   PetscFunctionReturn(PETSC_SUCCESS);
2116 }
2117 
2118 /*MC
2119    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2120    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2121    the matrix is stored.
2122 
2123    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2124    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2125 
2126    Options Database Key:
2127 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2128 
2129    Level: beginner
2130 
2131    Note:
2132      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2133      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2134 
2135 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2136 M*/
2137 
2138 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2139 {
2140   Mat_MPISBAIJ *b;
2141   PetscBool     flg = PETSC_FALSE;
2142 
2143   PetscFunctionBegin;
2144   PetscCall(PetscNew(&b));
2145   B->data   = (void *)b;
2146   B->ops[0] = MatOps_Values;
2147 
2148   B->ops->destroy = MatDestroy_MPISBAIJ;
2149   B->ops->view    = MatView_MPISBAIJ;
2150   B->assembled    = PETSC_FALSE;
2151   B->insertmode   = NOT_SET_VALUES;
2152 
2153   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2154   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2155 
2156   /* build local table of row and column ownerships */
2157   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2158 
2159   /* build cache for off array entries formed */
2160   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2161 
2162   b->donotstash  = PETSC_FALSE;
2163   b->colmap      = NULL;
2164   b->garray      = NULL;
2165   b->roworiented = PETSC_TRUE;
2166 
2167   /* stuff used in block assembly */
2168   b->barray = NULL;
2169 
2170   /* stuff used for matrix vector multiply */
2171   b->lvec    = NULL;
2172   b->Mvctx   = NULL;
2173   b->slvec0  = NULL;
2174   b->slvec0b = NULL;
2175   b->slvec1  = NULL;
2176   b->slvec1a = NULL;
2177   b->slvec1b = NULL;
2178   b->sMvctx  = NULL;
2179 
2180   /* stuff for MatGetRow() */
2181   b->rowindices   = NULL;
2182   b->rowvalues    = NULL;
2183   b->getrowactive = PETSC_FALSE;
2184 
2185   /* hash table stuff */
2186   b->ht           = NULL;
2187   b->hd           = NULL;
2188   b->ht_size      = 0;
2189   b->ht_flag      = PETSC_FALSE;
2190   b->ht_fact      = 0;
2191   b->ht_total_ct  = 0;
2192   b->ht_insert_ct = 0;
2193 
2194   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2195   b->ijonly = PETSC_FALSE;
2196 
2197   b->in_loc = NULL;
2198   b->v_loc  = NULL;
2199   b->n_loc  = 0;
2200 
2201   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2202   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2203   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2204   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2205 #if defined(PETSC_HAVE_ELEMENTAL)
2206   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2207 #endif
2208 #if defined(PETSC_HAVE_SCALAPACK)
2209   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2210 #endif
2211   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2212   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2213 
2214   B->symmetric                   = PETSC_BOOL3_TRUE;
2215   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2216   B->symmetry_eternal            = PETSC_TRUE;
2217   B->structural_symmetry_eternal = PETSC_TRUE;
2218 #if defined(PETSC_USE_COMPLEX)
2219   B->hermitian = PETSC_BOOL3_FALSE;
2220 #else
2221   B->hermitian = PETSC_BOOL3_TRUE;
2222 #endif
2223 
2224   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2225   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2226   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2227   if (flg) {
2228     PetscReal fact = 1.39;
2229     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2230     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2231     if (fact <= 1.0) fact = 1.39;
2232     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2233     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2234   }
2235   PetscOptionsEnd();
2236   PetscFunctionReturn(PETSC_SUCCESS);
2237 }
2238 
2239 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2240 /*MC
2241    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2242 
2243    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2244    and `MATMPISBAIJ` otherwise.
2245 
2246    Options Database Key:
2247 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2248 
2249   Level: beginner
2250 
2251 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2252 M*/
2253 
2254 /*@C
2255   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2256   the user should preallocate the matrix storage by setting the parameters
2257   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2258   performance can be increased by more than a factor of 50.
2259 
2260   Collective
2261 
2262   Input Parameters:
2263 + B     - the matrix
2264 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2265           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2266 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2267            submatrix  (same for all local rows)
2268 . d_nnz - array containing the number of block nonzeros in the various block rows
2269            in the upper triangular and diagonal part of the in diagonal portion of the local
2270            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2271            for the diagonal entry and set a value even if it is zero.
2272 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2273            submatrix (same for all local rows).
2274 - o_nnz - array containing the number of nonzeros in the various block rows of the
2275            off-diagonal portion of the local submatrix that is right of the diagonal
2276            (possibly different for each block row) or `NULL`.
2277 
2278   Options Database Keys:
2279 + -mat_no_unroll  - uses code that does not unroll the loops in the
2280                      block calculations (much slower)
2281 - -mat_block_size - size of the blocks to use
2282 
2283   Level: intermediate
2284 
2285   Notes:
2286 
2287   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2288   than it must be used on all processors that share the object for that argument.
2289 
2290   If the *_nnz parameter is given then the *_nz parameter is ignored
2291 
2292   Storage Information:
2293   For a square global matrix we define each processor's diagonal portion
2294   to be its local rows and the corresponding columns (a square submatrix);
2295   each processor's off-diagonal portion encompasses the remainder of the
2296   local matrix (a rectangular submatrix).
2297 
2298   The user can specify preallocated storage for the diagonal part of
2299   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2300   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2301   memory allocation.  Likewise, specify preallocated storage for the
2302   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2303 
2304   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2305   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2306   You can also run with the option `-info` and look for messages with the string
2307   malloc in them to see if additional memory allocation was needed.
2308 
2309   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2310   the figure below we depict these three local rows and all columns (0-11).
2311 
2312 .vb
2313            0 1 2 3 4 5 6 7 8 9 10 11
2314           --------------------------
2315    row 3  |. . . d d d o o o o  o  o
2316    row 4  |. . . d d d o o o o  o  o
2317    row 5  |. . . d d d o o o o  o  o
2318           --------------------------
2319 .ve
2320 
2321   Thus, any entries in the d locations are stored in the d (diagonal)
2322   submatrix, and any entries in the o locations are stored in the
2323   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2324   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2325 
2326   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2327   plus the diagonal part of the d matrix,
2328   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2329 
2330   In general, for PDE problems in which most nonzeros are near the diagonal,
2331   one expects `d_nz` >> `o_nz`.
2332 
2333 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2334 @*/
2335 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2336 {
2337   PetscFunctionBegin;
2338   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2339   PetscValidType(B, 1);
2340   PetscValidLogicalCollectiveInt(B, bs, 2);
2341   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2342   PetscFunctionReturn(PETSC_SUCCESS);
2343 }
2344 
2345 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2346 /*@C
2347   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2348   (block compressed row).  For good matrix assembly performance
2349   the user should preallocate the matrix storage by setting the parameters
2350   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2351 
2352   Collective
2353 
2354   Input Parameters:
2355 + comm  - MPI communicator
2356 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2357           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2358 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2359            This value should be the same as the local size used in creating the
2360            y vector for the matrix-vector product y = Ax.
2361 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2362            This value should be the same as the local size used in creating the
2363            x vector for the matrix-vector product y = Ax.
2364 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2365 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2366 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2367            submatrix (same for all local rows)
2368 . d_nnz - array containing the number of block nonzeros in the various block rows
2369            in the upper triangular portion of the in diagonal portion of the local
2370            (possibly different for each block block row) or `NULL`.
2371            If you plan to factor the matrix you must leave room for the diagonal entry and
2372            set its value even if it is zero.
2373 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2374            submatrix (same for all local rows).
2375 - o_nnz - array containing the number of nonzeros in the various block rows of the
2376            off-diagonal portion of the local submatrix (possibly different for
2377            each block row) or `NULL`.
2378 
2379   Output Parameter:
2380 . A - the matrix
2381 
2382   Options Database Keys:
2383 + -mat_no_unroll  - uses code that does not unroll the loops in the
2384                      block calculations (much slower)
2385 . -mat_block_size - size of the blocks to use
2386 - -mat_mpi        - use the parallel matrix data structures even on one processor
2387                (defaults to using SeqBAIJ format on one processor)
2388 
2389   Level: intermediate
2390 
2391   Notes:
2392   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2393   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2394   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2395 
2396   The number of rows and columns must be divisible by blocksize.
2397   This matrix type does not support complex Hermitian operation.
2398 
2399   The user MUST specify either the local or global matrix dimensions
2400   (possibly both).
2401 
2402   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2403   than it must be used on all processors that share the object for that argument.
2404 
2405   If the *_nnz parameter is given then the *_nz parameter is ignored
2406 
2407   Storage Information:
2408   For a square global matrix we define each processor's diagonal portion
2409   to be its local rows and the corresponding columns (a square submatrix);
2410   each processor's off-diagonal portion encompasses the remainder of the
2411   local matrix (a rectangular submatrix).
2412 
2413   The user can specify preallocated storage for the diagonal part of
2414   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2415   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2416   memory allocation. Likewise, specify preallocated storage for the
2417   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2418 
2419   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2420   the figure below we depict these three local rows and all columns (0-11).
2421 
2422 .vb
2423            0 1 2 3 4 5 6 7 8 9 10 11
2424           --------------------------
2425    row 3  |. . . d d d o o o o  o  o
2426    row 4  |. . . d d d o o o o  o  o
2427    row 5  |. . . d d d o o o o  o  o
2428           --------------------------
2429 .ve
2430 
2431   Thus, any entries in the d locations are stored in the d (diagonal)
2432   submatrix, and any entries in the o locations are stored in the
2433   o (off-diagonal) submatrix. Note that the d matrix is stored in
2434   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2435 
2436   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2437   plus the diagonal part of the d matrix,
2438   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2439   In general, for PDE problems in which most nonzeros are near the diagonal,
2440   one expects `d_nz` >> `o_nz`.
2441 
2442 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2443 @*/
2444 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2445 {
2446   PetscMPIInt size;
2447 
2448   PetscFunctionBegin;
2449   PetscCall(MatCreate(comm, A));
2450   PetscCall(MatSetSizes(*A, m, n, M, N));
2451   PetscCallMPI(MPI_Comm_size(comm, &size));
2452   if (size > 1) {
2453     PetscCall(MatSetType(*A, MATMPISBAIJ));
2454     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2455   } else {
2456     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2457     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2458   }
2459   PetscFunctionReturn(PETSC_SUCCESS);
2460 }
2461 
2462 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2463 {
2464   Mat           mat;
2465   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2466   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2467   PetscScalar  *array;
2468 
2469   PetscFunctionBegin;
2470   *newmat = NULL;
2471 
2472   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2473   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2474   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2475   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2476   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2477 
2478   if (matin->hash_active) {
2479     PetscCall(MatSetUp(mat));
2480   } else {
2481     mat->factortype   = matin->factortype;
2482     mat->preallocated = PETSC_TRUE;
2483     mat->assembled    = PETSC_TRUE;
2484     mat->insertmode   = NOT_SET_VALUES;
2485 
2486     a      = (Mat_MPISBAIJ *)mat->data;
2487     a->bs2 = oldmat->bs2;
2488     a->mbs = oldmat->mbs;
2489     a->nbs = oldmat->nbs;
2490     a->Mbs = oldmat->Mbs;
2491     a->Nbs = oldmat->Nbs;
2492 
2493     a->size         = oldmat->size;
2494     a->rank         = oldmat->rank;
2495     a->donotstash   = oldmat->donotstash;
2496     a->roworiented  = oldmat->roworiented;
2497     a->rowindices   = NULL;
2498     a->rowvalues    = NULL;
2499     a->getrowactive = PETSC_FALSE;
2500     a->barray       = NULL;
2501     a->rstartbs     = oldmat->rstartbs;
2502     a->rendbs       = oldmat->rendbs;
2503     a->cstartbs     = oldmat->cstartbs;
2504     a->cendbs       = oldmat->cendbs;
2505 
2506     /* hash table stuff */
2507     a->ht           = NULL;
2508     a->hd           = NULL;
2509     a->ht_size      = 0;
2510     a->ht_flag      = oldmat->ht_flag;
2511     a->ht_fact      = oldmat->ht_fact;
2512     a->ht_total_ct  = 0;
2513     a->ht_insert_ct = 0;
2514 
2515     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2516     if (oldmat->colmap) {
2517 #if defined(PETSC_USE_CTABLE)
2518       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2519 #else
2520       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2521       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2522 #endif
2523     } else a->colmap = NULL;
2524 
2525     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2526       PetscCall(PetscMalloc1(len, &a->garray));
2527       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2528     } else a->garray = NULL;
2529 
2530     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2531     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2532     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2533 
2534     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2535     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2536 
2537     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2538     PetscCall(VecGetArray(a->slvec1, &array));
2539     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2540     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2541     PetscCall(VecRestoreArray(a->slvec1, &array));
2542     PetscCall(VecGetArray(a->slvec0, &array));
2543     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2544     PetscCall(VecRestoreArray(a->slvec0, &array));
2545 
2546     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2547     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2548     a->sMvctx = oldmat->sMvctx;
2549 
2550     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2551     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2552   }
2553   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2554   *newmat = mat;
2555   PetscFunctionReturn(PETSC_SUCCESS);
2556 }
2557 
2558 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2559 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2560 
2561 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2562 {
2563   PetscBool isbinary;
2564 
2565   PetscFunctionBegin;
2566   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2567   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2568   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2569   PetscFunctionReturn(PETSC_SUCCESS);
2570 }
2571 
2572 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2573 {
2574   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2575   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2576   PetscReal     atmp;
2577   PetscReal    *work, *svalues, *rvalues;
2578   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2579   PetscMPIInt   rank, size;
2580   PetscInt     *rowners_bs, dest, count, source;
2581   PetscScalar  *va;
2582   MatScalar    *ba;
2583   MPI_Status    stat;
2584 
2585   PetscFunctionBegin;
2586   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2587   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2588   PetscCall(VecGetArray(v, &va));
2589 
2590   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2591   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2592 
2593   bs  = A->rmap->bs;
2594   mbs = a->mbs;
2595   Mbs = a->Mbs;
2596   ba  = b->a;
2597   bi  = b->i;
2598   bj  = b->j;
2599 
2600   /* find ownerships */
2601   rowners_bs = A->rmap->range;
2602 
2603   /* each proc creates an array to be distributed */
2604   PetscCall(PetscCalloc1(bs * Mbs, &work));
2605 
2606   /* row_max for B */
2607   if (rank != size - 1) {
2608     for (i = 0; i < mbs; i++) {
2609       ncols = bi[1] - bi[0];
2610       bi++;
2611       brow = bs * i;
2612       for (j = 0; j < ncols; j++) {
2613         bcol = bs * (*bj);
2614         for (kcol = 0; kcol < bs; kcol++) {
2615           col = bcol + kcol;           /* local col index */
2616           col += rowners_bs[rank + 1]; /* global col index */
2617           for (krow = 0; krow < bs; krow++) {
2618             atmp = PetscAbsScalar(*ba);
2619             ba++;
2620             row = brow + krow; /* local row index */
2621             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2622             if (work[col] < atmp) work[col] = atmp;
2623           }
2624         }
2625         bj++;
2626       }
2627     }
2628 
2629     /* send values to its owners */
2630     for (dest = rank + 1; dest < size; dest++) {
2631       svalues = work + rowners_bs[dest];
2632       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2633       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2634     }
2635   }
2636 
2637   /* receive values */
2638   if (rank) {
2639     rvalues = work;
2640     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2641     for (source = 0; source < rank; source++) {
2642       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2643       /* process values */
2644       for (i = 0; i < count; i++) {
2645         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2646       }
2647     }
2648   }
2649 
2650   PetscCall(VecRestoreArray(v, &va));
2651   PetscCall(PetscFree(work));
2652   PetscFunctionReturn(PETSC_SUCCESS);
2653 }
2654 
2655 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2656 {
2657   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2658   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2659   PetscScalar       *x, *ptr, *from;
2660   Vec                bb1;
2661   const PetscScalar *b;
2662 
2663   PetscFunctionBegin;
2664   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2665   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2666 
2667   if (flag == SOR_APPLY_UPPER) {
2668     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2669     PetscFunctionReturn(PETSC_SUCCESS);
2670   }
2671 
2672   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2673     if (flag & SOR_ZERO_INITIAL_GUESS) {
2674       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2675       its--;
2676     }
2677 
2678     PetscCall(VecDuplicate(bb, &bb1));
2679     while (its--) {
2680       /* lower triangular part: slvec0b = - B^T*xx */
2681       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2682 
2683       /* copy xx into slvec0a */
2684       PetscCall(VecGetArray(mat->slvec0, &ptr));
2685       PetscCall(VecGetArray(xx, &x));
2686       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2687       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2688 
2689       PetscCall(VecScale(mat->slvec0, -1.0));
2690 
2691       /* copy bb into slvec1a */
2692       PetscCall(VecGetArray(mat->slvec1, &ptr));
2693       PetscCall(VecGetArrayRead(bb, &b));
2694       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2695       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2696 
2697       /* set slvec1b = 0 */
2698       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2699       PetscCall(VecZeroEntries(mat->slvec1b));
2700 
2701       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2702       PetscCall(VecRestoreArray(xx, &x));
2703       PetscCall(VecRestoreArrayRead(bb, &b));
2704       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2705 
2706       /* upper triangular part: bb1 = bb1 - B*x */
2707       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2708 
2709       /* local diagonal sweep */
2710       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2711     }
2712     PetscCall(VecDestroy(&bb1));
2713   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2714     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2715   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2716     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2717   } else if (flag & SOR_EISENSTAT) {
2718     Vec                xx1;
2719     PetscBool          hasop;
2720     const PetscScalar *diag;
2721     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2722     PetscInt           i, n;
2723 
2724     if (!mat->xx1) {
2725       PetscCall(VecDuplicate(bb, &mat->xx1));
2726       PetscCall(VecDuplicate(bb, &mat->bb1));
2727     }
2728     xx1 = mat->xx1;
2729     bb1 = mat->bb1;
2730 
2731     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2732 
2733     if (!mat->diag) {
2734       /* this is wrong for same matrix with new nonzero values */
2735       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2736       PetscCall(MatGetDiagonal(matin, mat->diag));
2737     }
2738     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2739 
2740     if (hasop) {
2741       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2742       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2743     } else {
2744       /*
2745           These two lines are replaced by code that may be a bit faster for a good compiler
2746       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2747       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2748       */
2749       PetscCall(VecGetArray(mat->slvec1a, &sl));
2750       PetscCall(VecGetArrayRead(mat->diag, &diag));
2751       PetscCall(VecGetArrayRead(bb, &b));
2752       PetscCall(VecGetArray(xx, &x));
2753       PetscCall(VecGetLocalSize(xx, &n));
2754       if (omega == 1.0) {
2755         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2756         PetscCall(PetscLogFlops(2.0 * n));
2757       } else {
2758         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2759         PetscCall(PetscLogFlops(3.0 * n));
2760       }
2761       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2762       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2763       PetscCall(VecRestoreArrayRead(bb, &b));
2764       PetscCall(VecRestoreArray(xx, &x));
2765     }
2766 
2767     /* multiply off-diagonal portion of matrix */
2768     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2769     PetscCall(VecZeroEntries(mat->slvec1b));
2770     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2771     PetscCall(VecGetArray(mat->slvec0, &from));
2772     PetscCall(VecGetArray(xx, &x));
2773     PetscCall(PetscArraycpy(from, x, bs * mbs));
2774     PetscCall(VecRestoreArray(mat->slvec0, &from));
2775     PetscCall(VecRestoreArray(xx, &x));
2776     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2777     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2778     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2779 
2780     /* local sweep */
2781     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2782     PetscCall(VecAXPY(xx, 1.0, xx1));
2783   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2784   PetscFunctionReturn(PETSC_SUCCESS);
2785 }
2786 
2787 /*@
2788   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2789 
2790   Collective
2791 
2792   Input Parameters:
2793 + comm - MPI communicator
2794 . bs   - the block size, only a block size of 1 is supported
2795 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2796 . n    - This value should be the same as the local size used in creating the
2797          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2798          calculated if `N` is given) For square matrices `n` is almost always `m`.
2799 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2800 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2801 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2802 . j    - column indices
2803 - a    - matrix values
2804 
2805   Output Parameter:
2806 . mat - the matrix
2807 
2808   Level: intermediate
2809 
2810   Notes:
2811   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2812   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2813   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2814 
2815   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2816 
2817 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2818           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2819 @*/
2820 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2821 {
2822   PetscFunctionBegin;
2823   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2824   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2825   PetscCall(MatCreate(comm, mat));
2826   PetscCall(MatSetSizes(*mat, m, n, M, N));
2827   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2828   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2829   PetscFunctionReturn(PETSC_SUCCESS);
2830 }
2831 
2832 /*@C
2833   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2834 
2835   Collective
2836 
2837   Input Parameters:
2838 + B  - the matrix
2839 . bs - the block size
2840 . i  - the indices into `j` for the start of each local row (indices start with zero)
2841 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2842 - v  - optional values in the matrix, pass `NULL` if not provided
2843 
2844   Level: advanced
2845 
2846   Notes:
2847   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2848   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2849   called this routine.
2850 
2851   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2852   and usually the numerical values as well
2853 
2854   Any entries passed in that are below the diagonal are ignored
2855 
2856 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2857           `MatCreateMPISBAIJWithArrays()`
2858 @*/
2859 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2860 {
2861   PetscFunctionBegin;
2862   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2863   PetscFunctionReturn(PETSC_SUCCESS);
2864 }
2865 
2866 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2867 {
2868   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2869   PetscInt    *indx;
2870   PetscScalar *values;
2871 
2872   PetscFunctionBegin;
2873   PetscCall(MatGetSize(inmat, &m, &N));
2874   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2875     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2876     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2877     PetscInt     *bindx, rmax = a->rmax, j;
2878     PetscMPIInt   rank, size;
2879 
2880     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2881     mbs = m / bs;
2882     Nbs = N / cbs;
2883     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2884     nbs = n / cbs;
2885 
2886     PetscCall(PetscMalloc1(rmax, &bindx));
2887     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2888 
2889     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2890     PetscCallMPI(MPI_Comm_rank(comm, &size));
2891     if (rank == size - 1) {
2892       /* Check sum(nbs) = Nbs */
2893       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2894     }
2895 
2896     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2897     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2898     for (i = 0; i < mbs; i++) {
2899       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2900       nnz = nnz / bs;
2901       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2902       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2903       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2904     }
2905     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2906     PetscCall(PetscFree(bindx));
2907 
2908     PetscCall(MatCreate(comm, outmat));
2909     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2910     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2911     PetscCall(MatSetType(*outmat, MATSBAIJ));
2912     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2913     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2914     MatPreallocateEnd(dnz, onz);
2915   }
2916 
2917   /* numeric phase */
2918   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2919   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2920 
2921   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2922   for (i = 0; i < m; i++) {
2923     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2924     Ii = i + rstart;
2925     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2926     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2927   }
2928   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2929   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2930   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2931   PetscFunctionReturn(PETSC_SUCCESS);
2932 }
2933