xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e64794e4e0b181328b5d79973a03ee824bee75ee)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237   a_noinsert:; \
238     ailen[brow] = nrow; \
239   } while (0)
240 
241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
242   do { \
243     brow = row / bs; \
244     rp   = bj + bi[brow]; \
245     ap   = ba + bs2 * bi[brow]; \
246     rmax = bimax[brow]; \
247     nrow = bilen[brow]; \
248     bcol = col / bs; \
249     ridx = row % bs; \
250     cidx = col % bs; \
251     low  = 0; \
252     high = nrow; \
253     while (high - low > 3) { \
254       t = (low + high) / 2; \
255       if (rp[t] > bcol) high = t; \
256       else low = t; \
257     } \
258     for (_i = low; _i < high; _i++) { \
259       if (rp[_i] > bcol) break; \
260       if (rp[_i] == bcol) { \
261         bap = ap + bs2 * _i + bs * cidx + ridx; \
262         if (addv == ADD_VALUES) *bap += value; \
263         else *bap = value; \
264         goto b_noinsert; \
265       } \
266     } \
267     if (b->nonew == 1) goto b_noinsert; \
268     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
269     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
270     N = nrow++ - 1; \
271     /* shift up all the later entries in this row */ \
272     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
273     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
274     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
275     rp[_i]                          = bcol; \
276     ap[bs2 * _i + bs * cidx + ridx] = value; \
277   b_noinsert:; \
278     bilen[brow] = nrow; \
279   } while (0)
280 
281 /* Only add/insert a(i,j) with i<=j (blocks).
282    Any a(i,j) with i>j input by user is ignored or generates an error
283 */
284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
285 {
286   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
287   MatScalar     value;
288   PetscBool     roworiented = baij->roworiented;
289   PetscInt      i, j, row, col;
290   PetscInt      rstart_orig = mat->rmap->rstart;
291   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
292   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
293 
294   /* Some Variables required in the macro */
295   Mat           A     = baij->A;
296   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)A->data;
297   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
298   MatScalar    *aa = a->a;
299 
300   Mat          B     = baij->B;
301   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
302   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
303   MatScalar   *ba = b->a;
304 
305   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
306   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
307   MatScalar *ap, *bap;
308 
309   /* for stash */
310   PetscInt   n_loc, *in_loc = NULL;
311   MatScalar *v_loc = NULL;
312 
313   PetscFunctionBegin;
314   if (!baij->donotstash) {
315     if (n > baij->n_loc) {
316       PetscCall(PetscFree(baij->in_loc));
317       PetscCall(PetscFree(baij->v_loc));
318       PetscCall(PetscMalloc1(n, &baij->in_loc));
319       PetscCall(PetscMalloc1(n, &baij->v_loc));
320 
321       baij->n_loc = n;
322     }
323     in_loc = baij->in_loc;
324     v_loc  = baij->v_loc;
325   }
326 
327   for (i = 0; i < m; i++) {
328     if (im[i] < 0) continue;
329     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
330     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
331       row = im[i] - rstart_orig;                     /* local row index */
332       for (j = 0; j < n; j++) {
333         if (im[i] / bs > in[j] / bs) {
334           PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
335           continue; /* ignore lower triangular blocks */
336         }
337         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
338           col  = in[j] - cstart_orig;                    /* local col index */
339           brow = row / bs;
340           bcol = col / bs;
341           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
342           if (roworiented) value = v[i * n + j];
343           else value = v[i + j * m];
344           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
345           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
346         } else if (in[j] < 0) {
347           continue;
348         } else {
349           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
350           /* off-diag entry (B) */
351           if (mat->was_assembled) {
352             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
353 #if defined(PETSC_USE_CTABLE)
354             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
355             col = col - 1;
356 #else
357             col = baij->colmap[in[j] / bs] - 1;
358 #endif
359             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
360               PetscCall(MatDisAssemble_MPISBAIJ(mat));
361               col = in[j];
362               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
363               B     = baij->B;
364               b     = (Mat_SeqBAIJ *)B->data;
365               bimax = b->imax;
366               bi    = b->i;
367               bilen = b->ilen;
368               bj    = b->j;
369               ba    = b->a;
370             } else col += in[j] % bs;
371           } else col = in[j];
372           if (roworiented) value = v[i * n + j];
373           else value = v[i + j * m];
374           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
375           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
376         }
377       }
378     } else { /* off processor entry */
379       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
380       if (!baij->donotstash) {
381         mat->assembled = PETSC_FALSE;
382         n_loc          = 0;
383         for (j = 0; j < n; j++) {
384           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
385           in_loc[n_loc] = in[j];
386           if (roworiented) {
387             v_loc[n_loc] = v[i * n + j];
388           } else {
389             v_loc[n_loc] = v[j * m + i];
390           }
391           n_loc++;
392         }
393         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
394       }
395     }
396   }
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
401 {
402   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
403   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
404   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
405   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
406   PetscBool          roworiented = a->roworiented;
407   const PetscScalar *value       = v;
408   MatScalar         *ap, *aa = a->a, *bap;
409 
410   PetscFunctionBegin;
411   if (col < row) {
412     PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
413     PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
414   }
415   rp    = aj + ai[row];
416   ap    = aa + bs2 * ai[row];
417   rmax  = imax[row];
418   nrow  = ailen[row];
419   value = v;
420   low   = 0;
421   high  = nrow;
422 
423   while (high - low > 7) {
424     t = (low + high) / 2;
425     if (rp[t] > col) high = t;
426     else low = t;
427   }
428   for (i = low; i < high; i++) {
429     if (rp[i] > col) break;
430     if (rp[i] == col) {
431       bap = ap + bs2 * i;
432       if (roworiented) {
433         if (is == ADD_VALUES) {
434           for (ii = 0; ii < bs; ii++) {
435             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
436           }
437         } else {
438           for (ii = 0; ii < bs; ii++) {
439             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
440           }
441         }
442       } else {
443         if (is == ADD_VALUES) {
444           for (ii = 0; ii < bs; ii++) {
445             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
446           }
447         } else {
448           for (ii = 0; ii < bs; ii++) {
449             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
450           }
451         }
452       }
453       goto noinsert2;
454     }
455   }
456   if (nonew == 1) goto noinsert2;
457   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
458   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
459   N = nrow++ - 1;
460   high++;
461   /* shift up all the later entries in this row */
462   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
463   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
464   rp[i] = col;
465   bap   = ap + bs2 * i;
466   if (roworiented) {
467     for (ii = 0; ii < bs; ii++) {
468       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
469     }
470   } else {
471     for (ii = 0; ii < bs; ii++) {
472       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
473     }
474   }
475 noinsert2:;
476   ailen[row] = nrow;
477   PetscFunctionReturn(PETSC_SUCCESS);
478 }
479 
480 /*
481    This routine is exactly duplicated in mpibaij.c
482 */
483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
484 {
485   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
486   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
487   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
488   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
489   PetscBool          roworiented = a->roworiented;
490   const PetscScalar *value       = v;
491   MatScalar         *ap, *aa = a->a, *bap;
492 
493   PetscFunctionBegin;
494   rp    = aj + ai[row];
495   ap    = aa + bs2 * ai[row];
496   rmax  = imax[row];
497   nrow  = ailen[row];
498   low   = 0;
499   high  = nrow;
500   value = v;
501   while (high - low > 7) {
502     t = (low + high) / 2;
503     if (rp[t] > col) high = t;
504     else low = t;
505   }
506   for (i = low; i < high; i++) {
507     if (rp[i] > col) break;
508     if (rp[i] == col) {
509       bap = ap + bs2 * i;
510       if (roworiented) {
511         if (is == ADD_VALUES) {
512           for (ii = 0; ii < bs; ii++) {
513             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
514           }
515         } else {
516           for (ii = 0; ii < bs; ii++) {
517             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
518           }
519         }
520       } else {
521         if (is == ADD_VALUES) {
522           for (ii = 0; ii < bs; ii++, value += bs) {
523             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
524             bap += bs;
525           }
526         } else {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
529             bap += bs;
530           }
531         }
532       }
533       goto noinsert2;
534     }
535   }
536   if (nonew == 1) goto noinsert2;
537   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
538   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
539   N = nrow++ - 1;
540   high++;
541   /* shift up all the later entries in this row */
542   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
543   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
544   rp[i] = col;
545   bap   = ap + bs2 * i;
546   if (roworiented) {
547     for (ii = 0; ii < bs; ii++) {
548       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
549     }
550   } else {
551     for (ii = 0; ii < bs; ii++) {
552       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
553     }
554   }
555 noinsert2:;
556   ailen[row] = nrow;
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 /*
561     This routine could be optimized by removing the need for the block copy below and passing stride information
562   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
563 */
564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
565 {
566   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
567   const MatScalar *value;
568   MatScalar       *barray      = baij->barray;
569   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
570   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
571   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
572   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
573 
574   PetscFunctionBegin;
575   if (!barray) {
576     PetscCall(PetscMalloc1(bs2, &barray));
577     baij->barray = barray;
578   }
579 
580   if (roworiented) {
581     stepval = (n - 1) * bs;
582   } else {
583     stepval = (m - 1) * bs;
584   }
585   for (i = 0; i < m; i++) {
586     if (im[i] < 0) continue;
587     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
588     if (im[i] >= rstart && im[i] < rend) {
589       row = im[i] - rstart;
590       for (j = 0; j < n; j++) {
591         if (im[i] > in[j]) {
592           PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
593           continue; /* ignore lower triangular blocks */
594         }
595         /* If NumCol = 1 then a copy is not required */
596         if (roworiented && n == 1) {
597           barray = (MatScalar *)v + i * bs2;
598         } else if ((!roworiented) && (m == 1)) {
599           barray = (MatScalar *)v + j * bs2;
600         } else { /* Here a copy is required */
601           if (roworiented) {
602             value = v + i * (stepval + bs) * bs + j * bs;
603           } else {
604             value = v + j * (stepval + bs) * bs + i * bs;
605           }
606           for (ii = 0; ii < bs; ii++, value += stepval) {
607             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
608           }
609           barray -= bs2;
610         }
611 
612         if (in[j] >= cstart && in[j] < cend) {
613           col = in[j] - cstart;
614           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
615         } else if (in[j] < 0) {
616           continue;
617         } else {
618           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
619           if (mat->was_assembled) {
620             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
621 
622 #if defined(PETSC_USE_CTABLE)
623             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
624             col = col < 1 ? -1 : (col - 1) / bs;
625 #else
626             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
627 #endif
628             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
629               PetscCall(MatDisAssemble_MPISBAIJ(mat));
630               col = in[j];
631             }
632           } else col = in[j];
633           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
634         }
635       }
636     } else {
637       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
651 {
652   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
653   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
654   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
655 
656   PetscFunctionBegin;
657   for (i = 0; i < m; i++) {
658     if (idxm[i] < 0) continue; /* negative row */
659     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
660     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
661     row = idxm[i] - bsrstart;
662     for (j = 0; j < n; j++) {
663       if (idxn[j] < 0) continue; /* negative column */
664       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
665       if (idxn[j] >= bscstart && idxn[j] < bscend) {
666         col = idxn[j] - bscstart;
667         PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
668       } else {
669         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
670 #if defined(PETSC_USE_CTABLE)
671         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
672         data--;
673 #else
674         data = baij->colmap[idxn[j] / bs] - 1;
675 #endif
676         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
677         else {
678           col = data + idxn[j] % bs;
679           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
680         }
681       }
682     }
683   }
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
688 {
689   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
690   PetscReal     sum[2], *lnorm2;
691 
692   PetscFunctionBegin;
693   if (baij->size == 1) {
694     PetscCall(MatNorm(baij->A, type, norm));
695   } else {
696     if (type == NORM_FROBENIUS) {
697       PetscCall(PetscMalloc1(2, &lnorm2));
698       PetscCall(MatNorm(baij->A, type, lnorm2));
699       *lnorm2 = (*lnorm2) * (*lnorm2);
700       lnorm2++; /* squar power of norm(A) */
701       PetscCall(MatNorm(baij->B, type, lnorm2));
702       *lnorm2 = (*lnorm2) * (*lnorm2);
703       lnorm2--; /* squar power of norm(B) */
704       PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
705       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
706       PetscCall(PetscFree(lnorm2));
707     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
708       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
709       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
710       PetscReal    *rsum, vabs;
711       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
712       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
713       MatScalar    *v;
714 
715       PetscCall(PetscMalloc1(mat->cmap->N, &rsum));
716       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
717       /* Amat */
718       v  = amat->a;
719       jj = amat->j;
720       for (brow = 0; brow < mbs; brow++) {
721         grow = bs * (rstart + brow);
722         nz   = amat->i[brow + 1] - amat->i[brow];
723         for (bcol = 0; bcol < nz; bcol++) {
724           gcol = bs * (rstart + *jj);
725           jj++;
726           for (col = 0; col < bs; col++) {
727             for (row = 0; row < bs; row++) {
728               vabs = PetscAbsScalar(*v);
729               v++;
730               rsum[gcol + col] += vabs;
731               /* non-diagonal block */
732               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
733             }
734           }
735         }
736         PetscCall(PetscLogFlops(nz * bs * bs));
737       }
738       /* Bmat */
739       v  = bmat->a;
740       jj = bmat->j;
741       for (brow = 0; brow < mbs; brow++) {
742         grow = bs * (rstart + brow);
743         nz   = bmat->i[brow + 1] - bmat->i[brow];
744         for (bcol = 0; bcol < nz; bcol++) {
745           gcol = bs * garray[*jj];
746           jj++;
747           for (col = 0; col < bs; col++) {
748             for (row = 0; row < bs; row++) {
749               vabs = PetscAbsScalar(*v);
750               v++;
751               rsum[gcol + col] += vabs;
752               rsum[grow + row] += vabs;
753             }
754           }
755         }
756         PetscCall(PetscLogFlops(nz * bs * bs));
757       }
758       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
759       *norm = 0.0;
760       for (col = 0; col < mat->cmap->N; col++) {
761         if (rsum[col] > *norm) *norm = rsum[col];
762       }
763       PetscCall(PetscFree(rsum));
764     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
765   }
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
769 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
770 {
771   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
772   PetscInt      nstash, reallocs;
773 
774   PetscFunctionBegin;
775   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
776 
777   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
778   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
779   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
780   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
781   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
782   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
787 {
788   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
789   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
790   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
791   PetscInt     *row, *col;
792   PetscBool     all_assembled;
793   PetscMPIInt   n;
794   PetscBool     r1, r2, r3;
795   MatScalar    *val;
796 
797   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
798   PetscFunctionBegin;
799   if (!baij->donotstash && !mat->nooffprocentries) {
800     while (1) {
801       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
802       if (!flg) break;
803 
804       for (i = 0; i < n;) {
805         /* Now identify the consecutive vals belonging to the same row */
806         for (j = i, rstart = row[j]; j < n; j++) {
807           if (row[j] != rstart) break;
808         }
809         if (j < n) ncols = j - i;
810         else ncols = n - i;
811         /* Now assemble all these values with a single function call */
812         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
813         i = j;
814       }
815     }
816     PetscCall(MatStashScatterEnd_Private(&mat->stash));
817     /* Now process the block-stash. Since the values are stashed column-oriented,
818        set the row-oriented flag to column-oriented, and after MatSetValues()
819        restore the original flags */
820     r1 = baij->roworiented;
821     r2 = a->roworiented;
822     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
823 
824     baij->roworiented = PETSC_FALSE;
825     a->roworiented    = PETSC_FALSE;
826 
827     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */
828     while (1) {
829       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
830       if (!flg) break;
831 
832       for (i = 0; i < n;) {
833         /* Now identify the consecutive vals belonging to the same row */
834         for (j = i, rstart = row[j]; j < n; j++) {
835           if (row[j] != rstart) break;
836         }
837         if (j < n) ncols = j - i;
838         else ncols = n - i;
839         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
840         i = j;
841       }
842     }
843     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
844 
845     baij->roworiented = r1;
846     a->roworiented    = r2;
847 
848     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
849   }
850 
851   PetscCall(MatAssemblyBegin(baij->A, mode));
852   PetscCall(MatAssemblyEnd(baij->A, mode));
853 
854   /* determine if any process has disassembled, if so we must
855      also disassemble ourselves, in order that we may reassemble. */
856   /*
857      if nonzero structure of submatrix B cannot change then we know that
858      no process disassembled thus we can skip this stuff
859   */
860   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
861     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
862     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
863   }
864 
865   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
866   PetscCall(MatAssemblyBegin(baij->B, mode));
867   PetscCall(MatAssemblyEnd(baij->B, mode));
868 
869   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
870 
871   baij->rowvalues = NULL;
872 
873   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
874   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
875     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
876     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
877   }
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
882 #include <petscdraw.h>
883 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
884 {
885   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
886   PetscInt          bs   = mat->rmap->bs;
887   PetscMPIInt       rank = baij->rank;
888   PetscBool         isascii, isdraw;
889   PetscViewer       sviewer;
890   PetscViewerFormat format;
891 
892   PetscFunctionBegin;
893   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
894   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
895   if (isascii) {
896     PetscCall(PetscViewerGetFormat(viewer, &format));
897     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
898       MatInfo info;
899       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
900       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
901       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
902       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
903                                                    mat->rmap->bs, info.memory));
904       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
905       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
906       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
907       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
908       PetscCall(PetscViewerFlush(viewer));
909       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
910       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
911       PetscCall(VecScatterView(baij->Mvctx, viewer));
912       PetscFunctionReturn(PETSC_SUCCESS);
913     } else if (format == PETSC_VIEWER_ASCII_INFO) {
914       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
915       PetscFunctionReturn(PETSC_SUCCESS);
916     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
917       PetscFunctionReturn(PETSC_SUCCESS);
918     }
919   }
920 
921   if (isdraw) {
922     PetscDraw draw;
923     PetscBool isnull;
924     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
925     PetscCall(PetscDrawIsNull(draw, &isnull));
926     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
927   }
928 
929   {
930     /* assemble the entire matrix onto first processor. */
931     Mat           A;
932     Mat_SeqSBAIJ *Aloc;
933     Mat_SeqBAIJ  *Bloc;
934     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
935     MatScalar    *a;
936     const char   *matname;
937 
938     /* Should this be the same type as mat? */
939     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
940     if (rank == 0) {
941       PetscCall(MatSetSizes(A, M, N, M, N));
942     } else {
943       PetscCall(MatSetSizes(A, 0, 0, M, N));
944     }
945     PetscCall(MatSetType(A, MATMPISBAIJ));
946     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
947     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
948 
949     /* copy over the A part */
950     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
951     ai   = Aloc->i;
952     aj   = Aloc->j;
953     a    = Aloc->a;
954     PetscCall(PetscMalloc1(bs, &rvals));
955 
956     for (i = 0; i < mbs; i++) {
957       rvals[0] = bs * (baij->rstartbs + i);
958       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
959       for (j = ai[i]; j < ai[i + 1]; j++) {
960         col = (baij->cstartbs + aj[j]) * bs;
961         for (k = 0; k < bs; k++) {
962           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
963           col++;
964           a += bs;
965         }
966       }
967     }
968     /* copy over the B part */
969     Bloc = (Mat_SeqBAIJ *)baij->B->data;
970     ai   = Bloc->i;
971     aj   = Bloc->j;
972     a    = Bloc->a;
973     for (i = 0; i < mbs; i++) {
974       rvals[0] = bs * (baij->rstartbs + i);
975       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976       for (j = ai[i]; j < ai[i + 1]; j++) {
977         col = baij->garray[aj[j]] * bs;
978         for (k = 0; k < bs; k++) {
979           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
980           col++;
981           a += bs;
982         }
983       }
984     }
985     PetscCall(PetscFree(rvals));
986     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
987     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
988     /*
989        Everyone has to call to draw the matrix since the graphics waits are
990        synchronized across all processors that share the PetscDraw object
991     */
992     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
993     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
994     if (rank == 0) {
995       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
996       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
997     }
998     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
999     PetscCall(MatDestroy(&A));
1000   }
1001   PetscFunctionReturn(PETSC_SUCCESS);
1002 }
1003 
1004 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1005 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1006 
1007 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1008 {
1009   PetscBool isascii, isdraw, issocket, isbinary;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1013   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1014   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1015   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1016   if (isascii || isdraw || issocket) PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1017   else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1018   PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020 
1021 #if defined(PETSC_USE_COMPLEX)
1022 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1023 {
1024   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1025   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1026   PetscScalar       *from;
1027   const PetscScalar *x;
1028 
1029   PetscFunctionBegin;
1030   /* diagonal part */
1031   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1032   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1033   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1034   PetscCall(VecZeroEntries(a->slvec1b));
1035 
1036   /* subdiagonal part */
1037   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1038   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1039 
1040   /* copy x into the vec slvec0 */
1041   PetscCall(VecGetArray(a->slvec0, &from));
1042   PetscCall(VecGetArrayRead(xx, &x));
1043 
1044   PetscCall(PetscArraycpy(from, x, bs * mbs));
1045   PetscCall(VecRestoreArray(a->slvec0, &from));
1046   PetscCall(VecRestoreArrayRead(xx, &x));
1047 
1048   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1049   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1050   /* supperdiagonal part */
1051   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 #endif
1055 
1056 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1057 {
1058   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1059   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1060   PetscScalar       *from;
1061   const PetscScalar *x;
1062 
1063   PetscFunctionBegin;
1064   /* diagonal part */
1065   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1066   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1067   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1068   PetscCall(VecZeroEntries(a->slvec1b));
1069 
1070   /* subdiagonal part */
1071   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1072 
1073   /* copy x into the vec slvec0 */
1074   PetscCall(VecGetArray(a->slvec0, &from));
1075   PetscCall(VecGetArrayRead(xx, &x));
1076 
1077   PetscCall(PetscArraycpy(from, x, bs * mbs));
1078   PetscCall(VecRestoreArray(a->slvec0, &from));
1079   PetscCall(VecRestoreArrayRead(xx, &x));
1080 
1081   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1082   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1083   /* supperdiagonal part */
1084   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1085   PetscFunctionReturn(PETSC_SUCCESS);
1086 }
1087 
1088 #if PetscDefined(USE_COMPLEX)
1089 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1090 {
1091   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1092   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1093   PetscScalar       *from;
1094   const PetscScalar *x;
1095 
1096   PetscFunctionBegin;
1097   /* diagonal part */
1098   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1099   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1100   PetscCall(VecZeroEntries(a->slvec1b));
1101 
1102   /* subdiagonal part */
1103   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1104   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1105 
1106   /* copy x into the vec slvec0 */
1107   PetscCall(VecGetArray(a->slvec0, &from));
1108   PetscCall(VecGetArrayRead(xx, &x));
1109   PetscCall(PetscArraycpy(from, x, bs * mbs));
1110   PetscCall(VecRestoreArray(a->slvec0, &from));
1111 
1112   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1113   PetscCall(VecRestoreArrayRead(xx, &x));
1114   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1115 
1116   /* supperdiagonal part */
1117   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1118   PetscFunctionReturn(PETSC_SUCCESS);
1119 }
1120 #endif
1121 
1122 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1123 {
1124   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1125   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1126   PetscScalar       *from;
1127   const PetscScalar *x;
1128 
1129   PetscFunctionBegin;
1130   /* diagonal part */
1131   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1132   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1133   PetscCall(VecZeroEntries(a->slvec1b));
1134 
1135   /* subdiagonal part */
1136   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1137 
1138   /* copy x into the vec slvec0 */
1139   PetscCall(VecGetArray(a->slvec0, &from));
1140   PetscCall(VecGetArrayRead(xx, &x));
1141   PetscCall(PetscArraycpy(from, x, bs * mbs));
1142   PetscCall(VecRestoreArray(a->slvec0, &from));
1143 
1144   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1145   PetscCall(VecRestoreArrayRead(xx, &x));
1146   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1147 
1148   /* supperdiagonal part */
1149   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 /*
1154   This only works correctly for square matrices where the subblock A->A is the
1155    diagonal block
1156 */
1157 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1158 {
1159   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1160 
1161   PetscFunctionBegin;
1162   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1163   PetscCall(MatGetDiagonal(a->A, v));
1164   PetscFunctionReturn(PETSC_SUCCESS);
1165 }
1166 
1167 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1168 {
1169   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1170 
1171   PetscFunctionBegin;
1172   PetscCall(MatScale(a->A, aa));
1173   PetscCall(MatScale(a->B, aa));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1178 {
1179   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1180   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1181   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1182   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1183   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1184 
1185   PetscFunctionBegin;
1186   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1187   mat->getrowactive = PETSC_TRUE;
1188 
1189   if (!mat->rowvalues && (idx || v)) {
1190     /*
1191         allocate enough space to hold information from the longest row.
1192     */
1193     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1194     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1195     PetscInt      max = 1, mbs = mat->mbs, tmp;
1196     for (i = 0; i < mbs; i++) {
1197       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1198       if (max < tmp) max = tmp;
1199     }
1200     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1201   }
1202 
1203   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1204   lrow = row - brstart; /* local row index */
1205 
1206   pvA = &vworkA;
1207   pcA = &cworkA;
1208   pvB = &vworkB;
1209   pcB = &cworkB;
1210   if (!v) {
1211     pvA = NULL;
1212     pvB = NULL;
1213   }
1214   if (!idx) {
1215     pcA = NULL;
1216     if (!v) pcB = NULL;
1217   }
1218   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1219   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1220   nztot = nzA + nzB;
1221 
1222   cmap = mat->garray;
1223   if (v || idx) {
1224     if (nztot) {
1225       /* Sort by increasing column numbers, assuming A and B already sorted */
1226       PetscInt imark = -1;
1227       if (v) {
1228         *v = v_p = mat->rowvalues;
1229         for (i = 0; i < nzB; i++) {
1230           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1231           else break;
1232         }
1233         imark = i;
1234         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1235         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1236       }
1237       if (idx) {
1238         *idx = idx_p = mat->rowindices;
1239         if (imark > -1) {
1240           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1241         } else {
1242           for (i = 0; i < nzB; i++) {
1243             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1244             else break;
1245           }
1246           imark = i;
1247         }
1248         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1249         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1250       }
1251     } else {
1252       if (idx) *idx = NULL;
1253       if (v) *v = NULL;
1254     }
1255   }
1256   *nz = nztot;
1257   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1258   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1259   PetscFunctionReturn(PETSC_SUCCESS);
1260 }
1261 
1262 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1263 {
1264   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1265 
1266   PetscFunctionBegin;
1267   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1268   baij->getrowactive = PETSC_FALSE;
1269   PetscFunctionReturn(PETSC_SUCCESS);
1270 }
1271 
1272 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1273 {
1274   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1275   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1276 
1277   PetscFunctionBegin;
1278   aA->getrow_utriangular = PETSC_TRUE;
1279   PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1282 {
1283   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1284   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1285 
1286   PetscFunctionBegin;
1287   aA->getrow_utriangular = PETSC_FALSE;
1288   PetscFunctionReturn(PETSC_SUCCESS);
1289 }
1290 
1291 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1292 {
1293   PetscFunctionBegin;
1294   if (PetscDefined(USE_COMPLEX)) {
1295     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1296 
1297     PetscCall(MatConjugate(a->A));
1298     PetscCall(MatConjugate(a->B));
1299   }
1300   PetscFunctionReturn(PETSC_SUCCESS);
1301 }
1302 
1303 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1304 {
1305   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1306 
1307   PetscFunctionBegin;
1308   PetscCall(MatRealPart(a->A));
1309   PetscCall(MatRealPart(a->B));
1310   PetscFunctionReturn(PETSC_SUCCESS);
1311 }
1312 
1313 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1314 {
1315   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1316 
1317   PetscFunctionBegin;
1318   PetscCall(MatImaginaryPart(a->A));
1319   PetscCall(MatImaginaryPart(a->B));
1320   PetscFunctionReturn(PETSC_SUCCESS);
1321 }
1322 
1323 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1324    Input: isrow       - distributed(parallel),
1325           iscol_local - locally owned (seq)
1326 */
1327 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1328 {
1329   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1330   const PetscInt *ptr1, *ptr2;
1331 
1332   PetscFunctionBegin;
1333   *flg = PETSC_FALSE;
1334   PetscCall(ISGetLocalSize(isrow, &sz1));
1335   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1336   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1337 
1338   PetscCall(ISGetIndices(isrow, &ptr1));
1339   PetscCall(ISGetIndices(iscol_local, &ptr2));
1340 
1341   PetscCall(PetscMalloc1(sz1, &a1));
1342   PetscCall(PetscMalloc1(sz2, &a2));
1343   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1344   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1345   PetscCall(PetscSortInt(sz1, a1));
1346   PetscCall(PetscSortInt(sz2, a2));
1347 
1348   nmatch = 0;
1349   k      = 0;
1350   for (i = 0; i < sz1; i++) {
1351     for (j = k; j < sz2; j++) {
1352       if (a1[i] == a2[j]) {
1353         k = j;
1354         nmatch++;
1355         break;
1356       }
1357     }
1358   }
1359   PetscCall(ISRestoreIndices(isrow, &ptr1));
1360   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1361   PetscCall(PetscFree(a1));
1362   PetscCall(PetscFree(a2));
1363   if (nmatch < sz1) {
1364     *flg = PETSC_FALSE;
1365   } else {
1366     *flg = PETSC_TRUE;
1367   }
1368   PetscFunctionReturn(PETSC_SUCCESS);
1369 }
1370 
1371 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1372 {
1373   Mat       C[2];
1374   IS        iscol_local, isrow_local;
1375   PetscInt  csize, csize_local, rsize;
1376   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1377 
1378   PetscFunctionBegin;
1379   PetscCall(ISGetLocalSize(iscol, &csize));
1380   PetscCall(ISGetLocalSize(isrow, &rsize));
1381   if (call == MAT_REUSE_MATRIX) {
1382     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1383     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1384   } else {
1385     PetscCall(ISAllGather(iscol, &iscol_local));
1386     PetscCall(ISSorted(iscol_local, &issorted));
1387     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1388   }
1389   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1390   if (!isequal) {
1391     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1392     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1393     if (!isidentity) {
1394       if (call == MAT_REUSE_MATRIX) {
1395         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1396         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1397       } else {
1398         PetscCall(ISAllGather(isrow, &isrow_local));
1399         PetscCall(ISSorted(isrow_local, &issorted));
1400         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1401       }
1402     }
1403   }
1404   /* now call MatCreateSubMatrix_MPIBAIJ() */
1405   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1406   if (!isequal && !isidentity) {
1407     if (call == MAT_INITIAL_MATRIX) {
1408       IS       intersect;
1409       PetscInt ni;
1410 
1411       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1412       PetscCall(ISGetLocalSize(intersect, &ni));
1413       PetscCall(ISDestroy(&intersect));
1414       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1415     }
1416     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1417     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1418     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1419     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1420     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1421     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1422     PetscCall(MatDestroy(C));
1423     PetscCall(MatDestroy(C + 1));
1424   }
1425   if (call == MAT_INITIAL_MATRIX) {
1426     if (!isequal && !isidentity) {
1427       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1428       PetscCall(ISDestroy(&isrow_local));
1429     }
1430     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1431     PetscCall(ISDestroy(&iscol_local));
1432   }
1433   PetscFunctionReturn(PETSC_SUCCESS);
1434 }
1435 
1436 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1437 {
1438   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1439 
1440   PetscFunctionBegin;
1441   PetscCall(MatZeroEntries(l->A));
1442   PetscCall(MatZeroEntries(l->B));
1443   PetscFunctionReturn(PETSC_SUCCESS);
1444 }
1445 
1446 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1447 {
1448   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1449   Mat            A = a->A, B = a->B;
1450   PetscLogDouble isend[5], irecv[5];
1451 
1452   PetscFunctionBegin;
1453   info->block_size = (PetscReal)matin->rmap->bs;
1454 
1455   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1456 
1457   isend[0] = info->nz_used;
1458   isend[1] = info->nz_allocated;
1459   isend[2] = info->nz_unneeded;
1460   isend[3] = info->memory;
1461   isend[4] = info->mallocs;
1462 
1463   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1464 
1465   isend[0] += info->nz_used;
1466   isend[1] += info->nz_allocated;
1467   isend[2] += info->nz_unneeded;
1468   isend[3] += info->memory;
1469   isend[4] += info->mallocs;
1470   if (flag == MAT_LOCAL) {
1471     info->nz_used      = isend[0];
1472     info->nz_allocated = isend[1];
1473     info->nz_unneeded  = isend[2];
1474     info->memory       = isend[3];
1475     info->mallocs      = isend[4];
1476   } else if (flag == MAT_GLOBAL_MAX) {
1477     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1478 
1479     info->nz_used      = irecv[0];
1480     info->nz_allocated = irecv[1];
1481     info->nz_unneeded  = irecv[2];
1482     info->memory       = irecv[3];
1483     info->mallocs      = irecv[4];
1484   } else if (flag == MAT_GLOBAL_SUM) {
1485     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1486 
1487     info->nz_used      = irecv[0];
1488     info->nz_allocated = irecv[1];
1489     info->nz_unneeded  = irecv[2];
1490     info->memory       = irecv[3];
1491     info->mallocs      = irecv[4];
1492   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1493   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1494   info->fill_ratio_needed = 0;
1495   info->factor_mallocs    = 0;
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1500 {
1501   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1502   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1503 
1504   PetscFunctionBegin;
1505   switch (op) {
1506   case MAT_NEW_NONZERO_LOCATIONS:
1507   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1508   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1509   case MAT_KEEP_NONZERO_PATTERN:
1510   case MAT_NEW_NONZERO_LOCATION_ERR:
1511     MatCheckPreallocated(A, 1);
1512     PetscCall(MatSetOption(a->A, op, flg));
1513     PetscCall(MatSetOption(a->B, op, flg));
1514     break;
1515   case MAT_ROW_ORIENTED:
1516     MatCheckPreallocated(A, 1);
1517     a->roworiented = flg;
1518 
1519     PetscCall(MatSetOption(a->A, op, flg));
1520     PetscCall(MatSetOption(a->B, op, flg));
1521     break;
1522   case MAT_IGNORE_OFF_PROC_ENTRIES:
1523     a->donotstash = flg;
1524     break;
1525   case MAT_USE_HASH_TABLE:
1526     a->ht_flag = flg;
1527     break;
1528   case MAT_HERMITIAN:
1529     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1530 #if defined(PETSC_USE_COMPLEX)
1531     if (flg) { /* need different mat-vec ops */
1532       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1533       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1534       A->ops->multtranspose    = NULL;
1535       A->ops->multtransposeadd = NULL;
1536       A->symmetric             = PETSC_BOOL3_FALSE;
1537     }
1538 #endif
1539     break;
1540   case MAT_SPD:
1541   case MAT_SYMMETRIC:
1542     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1543 #if defined(PETSC_USE_COMPLEX)
1544     if (flg) { /* restore to use default mat-vec ops */
1545       A->ops->mult             = MatMult_MPISBAIJ;
1546       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1547       A->ops->multtranspose    = MatMult_MPISBAIJ;
1548       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1549     }
1550 #endif
1551     break;
1552   case MAT_STRUCTURALLY_SYMMETRIC:
1553     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1554     break;
1555   case MAT_IGNORE_LOWER_TRIANGULAR:
1556   case MAT_ERROR_LOWER_TRIANGULAR:
1557     aA->ignore_ltriangular = flg;
1558     break;
1559   case MAT_GETROW_UPPERTRIANGULAR:
1560     aA->getrow_utriangular = flg;
1561     break;
1562   default:
1563     break;
1564   }
1565   PetscFunctionReturn(PETSC_SUCCESS);
1566 }
1567 
1568 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1569 {
1570   PetscFunctionBegin;
1571   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1572   if (reuse == MAT_INITIAL_MATRIX) {
1573     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1574   } else if (reuse == MAT_REUSE_MATRIX) {
1575     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1576   }
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1581 {
1582   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1583   Mat           a = baij->A, b = baij->B;
1584   PetscInt      nv, m, n;
1585   PetscBool     flg;
1586 
1587   PetscFunctionBegin;
1588   if (ll != rr) {
1589     PetscCall(VecEqual(ll, rr, &flg));
1590     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1591   }
1592   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1593 
1594   PetscCall(MatGetLocalSize(mat, &m, &n));
1595   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1596 
1597   PetscCall(VecGetLocalSize(rr, &nv));
1598   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1599 
1600   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1601 
1602   /* left diagonalscale the off-diagonal part */
1603   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1604 
1605   /* scale the diagonal part */
1606   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1607 
1608   /* right diagonalscale the off-diagonal part */
1609   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1610   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1611   PetscFunctionReturn(PETSC_SUCCESS);
1612 }
1613 
1614 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1615 {
1616   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1617 
1618   PetscFunctionBegin;
1619   PetscCall(MatSetUnfactored(a->A));
1620   PetscFunctionReturn(PETSC_SUCCESS);
1621 }
1622 
1623 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1624 
1625 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1626 {
1627   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1628   Mat           a, b, c, d;
1629   PetscBool     flg;
1630 
1631   PetscFunctionBegin;
1632   a = matA->A;
1633   b = matA->B;
1634   c = matB->A;
1635   d = matB->B;
1636 
1637   PetscCall(MatEqual(a, c, &flg));
1638   if (flg) PetscCall(MatEqual(b, d, &flg));
1639   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1640   PetscFunctionReturn(PETSC_SUCCESS);
1641 }
1642 
1643 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1644 {
1645   PetscBool isbaij;
1646 
1647   PetscFunctionBegin;
1648   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1649   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1650   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1651   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1652     PetscCall(MatGetRowUpperTriangular(A));
1653     PetscCall(MatCopy_Basic(A, B, str));
1654     PetscCall(MatRestoreRowUpperTriangular(A));
1655   } else {
1656     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1657     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1658 
1659     PetscCall(MatCopy(a->A, b->A, str));
1660     PetscCall(MatCopy(a->B, b->B, str));
1661   }
1662   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1663   PetscFunctionReturn(PETSC_SUCCESS);
1664 }
1665 
1666 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1667 {
1668   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1669   PetscBLASInt  bnz, one                          = 1;
1670   Mat_SeqSBAIJ *xa, *ya;
1671   Mat_SeqBAIJ  *xb, *yb;
1672 
1673   PetscFunctionBegin;
1674   if (str == SAME_NONZERO_PATTERN) {
1675     PetscScalar alpha = a;
1676     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1677     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1678     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1679     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1680     xb = (Mat_SeqBAIJ *)xx->B->data;
1681     yb = (Mat_SeqBAIJ *)yy->B->data;
1682     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1683     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1684     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1685   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1686     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1687     PetscCall(MatAXPY_Basic(Y, a, X, str));
1688     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1689   } else {
1690     Mat       B;
1691     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1692     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1693     PetscCall(MatGetRowUpperTriangular(X));
1694     PetscCall(MatGetRowUpperTriangular(Y));
1695     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1696     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1697     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1698     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1699     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1700     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1701     PetscCall(MatSetType(B, MATMPISBAIJ));
1702     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1703     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1704     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1705     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1706     PetscCall(MatHeaderMerge(Y, &B));
1707     PetscCall(PetscFree(nnz_d));
1708     PetscCall(PetscFree(nnz_o));
1709     PetscCall(MatRestoreRowUpperTriangular(X));
1710     PetscCall(MatRestoreRowUpperTriangular(Y));
1711   }
1712   PetscFunctionReturn(PETSC_SUCCESS);
1713 }
1714 
1715 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1716 {
1717   PetscInt  i;
1718   PetscBool flg;
1719 
1720   PetscFunctionBegin;
1721   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1722   for (i = 0; i < n; i++) {
1723     PetscCall(ISEqual(irow[i], icol[i], &flg));
1724     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1725   }
1726   PetscFunctionReturn(PETSC_SUCCESS);
1727 }
1728 
1729 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1730 {
1731   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1732   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1733 
1734   PetscFunctionBegin;
1735   if (!Y->preallocated) {
1736     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1737   } else if (!aij->nz) {
1738     PetscInt nonew = aij->nonew;
1739     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1740     aij->nonew = nonew;
1741   }
1742   PetscCall(MatShift_Basic(Y, a));
1743   PetscFunctionReturn(PETSC_SUCCESS);
1744 }
1745 
1746 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1747 {
1748   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1749 
1750   PetscFunctionBegin;
1751   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1752   PetscCall(MatMissingDiagonal(a->A, missing, d));
1753   if (d) {
1754     PetscInt rstart;
1755     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1756     *d += rstart / A->rmap->bs;
1757   }
1758   PetscFunctionReturn(PETSC_SUCCESS);
1759 }
1760 
1761 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1762 {
1763   PetscFunctionBegin;
1764   *a = ((Mat_MPISBAIJ *)A->data)->A;
1765   PetscFunctionReturn(PETSC_SUCCESS);
1766 }
1767 
1768 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1769 {
1770   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1771 
1772   PetscFunctionBegin;
1773   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1774   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1775   PetscFunctionReturn(PETSC_SUCCESS);
1776 }
1777 
1778 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1779 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1780 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1781 
1782 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1783                                        MatGetRow_MPISBAIJ,
1784                                        MatRestoreRow_MPISBAIJ,
1785                                        MatMult_MPISBAIJ,
1786                                        /*  4*/ MatMultAdd_MPISBAIJ,
1787                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1788                                        MatMultAdd_MPISBAIJ,
1789                                        NULL,
1790                                        NULL,
1791                                        NULL,
1792                                        /* 10*/ NULL,
1793                                        NULL,
1794                                        NULL,
1795                                        MatSOR_MPISBAIJ,
1796                                        MatTranspose_MPISBAIJ,
1797                                        /* 15*/ MatGetInfo_MPISBAIJ,
1798                                        MatEqual_MPISBAIJ,
1799                                        MatGetDiagonal_MPISBAIJ,
1800                                        MatDiagonalScale_MPISBAIJ,
1801                                        MatNorm_MPISBAIJ,
1802                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1803                                        MatAssemblyEnd_MPISBAIJ,
1804                                        MatSetOption_MPISBAIJ,
1805                                        MatZeroEntries_MPISBAIJ,
1806                                        /* 24*/ NULL,
1807                                        NULL,
1808                                        NULL,
1809                                        NULL,
1810                                        NULL,
1811                                        /* 29*/ MatSetUp_MPI_Hash,
1812                                        NULL,
1813                                        NULL,
1814                                        MatGetDiagonalBlock_MPISBAIJ,
1815                                        NULL,
1816                                        /* 34*/ MatDuplicate_MPISBAIJ,
1817                                        NULL,
1818                                        NULL,
1819                                        NULL,
1820                                        NULL,
1821                                        /* 39*/ MatAXPY_MPISBAIJ,
1822                                        MatCreateSubMatrices_MPISBAIJ,
1823                                        MatIncreaseOverlap_MPISBAIJ,
1824                                        MatGetValues_MPISBAIJ,
1825                                        MatCopy_MPISBAIJ,
1826                                        /* 44*/ NULL,
1827                                        MatScale_MPISBAIJ,
1828                                        MatShift_MPISBAIJ,
1829                                        NULL,
1830                                        NULL,
1831                                        /* 49*/ NULL,
1832                                        NULL,
1833                                        NULL,
1834                                        NULL,
1835                                        NULL,
1836                                        /* 54*/ NULL,
1837                                        NULL,
1838                                        MatSetUnfactored_MPISBAIJ,
1839                                        NULL,
1840                                        MatSetValuesBlocked_MPISBAIJ,
1841                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1842                                        NULL,
1843                                        NULL,
1844                                        NULL,
1845                                        NULL,
1846                                        /* 64*/ NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        MatGetRowMaxAbs_MPISBAIJ,
1851                                        /* 69*/ NULL,
1852                                        MatConvert_MPISBAIJ_Basic,
1853                                        NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        MatLoad_MPISBAIJ,
1861                                        /* 79*/ NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        /* 84*/ NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        /* 89*/ NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        MatConjugate_MPISBAIJ,
1876                                        /* 94*/ NULL,
1877                                        NULL,
1878                                        MatRealPart_MPISBAIJ,
1879                                        MatImaginaryPart_MPISBAIJ,
1880                                        MatGetRowUpperTriangular_MPISBAIJ,
1881                                        /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        /*104*/ MatMissingDiagonal_MPISBAIJ,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        /*109*/ NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        /*114*/ NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        /*119*/ NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        /*124*/ NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        MatSetBlockSizes_Default,
1910                                        NULL,
1911                                        /*129*/ NULL,
1912                                        NULL,
1913                                        MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1914                                        NULL,
1915                                        NULL,
1916                                        /*134*/ NULL,
1917                                        NULL,
1918                                        NULL,
1919                                        MatEliminateZeros_MPISBAIJ,
1920                                        NULL,
1921                                        /*139*/ NULL,
1922                                        NULL,
1923                                        NULL,
1924                                        MatCopyHashToXAIJ_MPI_Hash,
1925                                        NULL};
1926 
1927 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1928 {
1929   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1930   PetscInt      i, mbs, Mbs;
1931   PetscMPIInt   size;
1932 
1933   PetscFunctionBegin;
1934   if (B->hash_active) {
1935     B->ops[0]      = b->cops;
1936     B->hash_active = PETSC_FALSE;
1937   }
1938   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1939   PetscCall(MatSetBlockSize(B, bs));
1940   PetscCall(PetscLayoutSetUp(B->rmap));
1941   PetscCall(PetscLayoutSetUp(B->cmap));
1942   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1943   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1944   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1945 
1946   mbs = B->rmap->n / bs;
1947   Mbs = B->rmap->N / bs;
1948   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1949 
1950   B->rmap->bs = bs;
1951   b->bs2      = bs * bs;
1952   b->mbs      = mbs;
1953   b->Mbs      = Mbs;
1954   b->nbs      = B->cmap->n / bs;
1955   b->Nbs      = B->cmap->N / bs;
1956 
1957   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1958   b->rstartbs = B->rmap->rstart / bs;
1959   b->rendbs   = B->rmap->rend / bs;
1960 
1961   b->cstartbs = B->cmap->rstart / bs;
1962   b->cendbs   = B->cmap->rend / bs;
1963 
1964 #if defined(PETSC_USE_CTABLE)
1965   PetscCall(PetscHMapIDestroy(&b->colmap));
1966 #else
1967   PetscCall(PetscFree(b->colmap));
1968 #endif
1969   PetscCall(PetscFree(b->garray));
1970   PetscCall(VecDestroy(&b->lvec));
1971   PetscCall(VecScatterDestroy(&b->Mvctx));
1972   PetscCall(VecDestroy(&b->slvec0));
1973   PetscCall(VecDestroy(&b->slvec0b));
1974   PetscCall(VecDestroy(&b->slvec1));
1975   PetscCall(VecDestroy(&b->slvec1a));
1976   PetscCall(VecDestroy(&b->slvec1b));
1977   PetscCall(VecScatterDestroy(&b->sMvctx));
1978 
1979   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1980 
1981   MatSeqXAIJGetOptions_Private(b->B);
1982   PetscCall(MatDestroy(&b->B));
1983   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1984   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1985   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1986   MatSeqXAIJRestoreOptions_Private(b->B);
1987 
1988   MatSeqXAIJGetOptions_Private(b->A);
1989   PetscCall(MatDestroy(&b->A));
1990   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1991   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1992   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1993   MatSeqXAIJRestoreOptions_Private(b->A);
1994 
1995   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1996   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1997 
1998   B->preallocated  = PETSC_TRUE;
1999   B->was_assembled = PETSC_FALSE;
2000   B->assembled     = PETSC_FALSE;
2001   PetscFunctionReturn(PETSC_SUCCESS);
2002 }
2003 
2004 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2005 {
2006   PetscInt        m, rstart, cend;
2007   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2008   const PetscInt *JJ          = NULL;
2009   PetscScalar    *values      = NULL;
2010   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2011   PetscBool       nooffprocentries;
2012 
2013   PetscFunctionBegin;
2014   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2015   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2016   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2017   PetscCall(PetscLayoutSetUp(B->rmap));
2018   PetscCall(PetscLayoutSetUp(B->cmap));
2019   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2020   m      = B->rmap->n / bs;
2021   rstart = B->rmap->rstart / bs;
2022   cend   = B->cmap->rend / bs;
2023 
2024   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2025   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2026   for (i = 0; i < m; i++) {
2027     nz = ii[i + 1] - ii[i];
2028     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2029     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2030     JJ = jj + ii[i];
2031     bd = 0;
2032     for (j = 0; j < nz; j++) {
2033       if (*JJ >= i + rstart) break;
2034       JJ++;
2035       bd++;
2036     }
2037     d = 0;
2038     for (; j < nz; j++) {
2039       if (*JJ++ >= cend) break;
2040       d++;
2041     }
2042     d_nnz[i] = d;
2043     o_nnz[i] = nz - d - bd;
2044     nz       = nz - bd;
2045     nz_max   = PetscMax(nz_max, nz);
2046   }
2047   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2048   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2049   PetscCall(PetscFree2(d_nnz, o_nnz));
2050 
2051   values = (PetscScalar *)V;
2052   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2053   for (i = 0; i < m; i++) {
2054     PetscInt        row   = i + rstart;
2055     PetscInt        ncols = ii[i + 1] - ii[i];
2056     const PetscInt *icols = jj + ii[i];
2057     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2058       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2059       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2060     } else { /* block ordering does not match so we can only insert one block at a time. */
2061       PetscInt j;
2062       for (j = 0; j < ncols; j++) {
2063         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2064         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2065       }
2066     }
2067   }
2068 
2069   if (!V) PetscCall(PetscFree(values));
2070   nooffprocentries    = B->nooffprocentries;
2071   B->nooffprocentries = PETSC_TRUE;
2072   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2073   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2074   B->nooffprocentries = nooffprocentries;
2075 
2076   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 /*MC
2081    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2082    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2083    the matrix is stored.
2084 
2085    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2086    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2087 
2088    Options Database Key:
2089 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2090 
2091    Level: beginner
2092 
2093    Note:
2094      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2095      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2096 
2097 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2098 M*/
2099 
2100 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2101 {
2102   Mat_MPISBAIJ *b;
2103   PetscBool     flg = PETSC_FALSE;
2104 
2105   PetscFunctionBegin;
2106   PetscCall(PetscNew(&b));
2107   B->data   = (void *)b;
2108   B->ops[0] = MatOps_Values;
2109 
2110   B->ops->destroy = MatDestroy_MPISBAIJ;
2111   B->ops->view    = MatView_MPISBAIJ;
2112   B->assembled    = PETSC_FALSE;
2113   B->insertmode   = NOT_SET_VALUES;
2114 
2115   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2116   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2117 
2118   /* build local table of row and column ownerships */
2119   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2120 
2121   /* build cache for off array entries formed */
2122   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2123 
2124   b->donotstash  = PETSC_FALSE;
2125   b->colmap      = NULL;
2126   b->garray      = NULL;
2127   b->roworiented = PETSC_TRUE;
2128 
2129   /* stuff used in block assembly */
2130   b->barray = NULL;
2131 
2132   /* stuff used for matrix vector multiply */
2133   b->lvec    = NULL;
2134   b->Mvctx   = NULL;
2135   b->slvec0  = NULL;
2136   b->slvec0b = NULL;
2137   b->slvec1  = NULL;
2138   b->slvec1a = NULL;
2139   b->slvec1b = NULL;
2140   b->sMvctx  = NULL;
2141 
2142   /* stuff for MatGetRow() */
2143   b->rowindices   = NULL;
2144   b->rowvalues    = NULL;
2145   b->getrowactive = PETSC_FALSE;
2146 
2147   /* hash table stuff */
2148   b->ht           = NULL;
2149   b->hd           = NULL;
2150   b->ht_size      = 0;
2151   b->ht_flag      = PETSC_FALSE;
2152   b->ht_fact      = 0;
2153   b->ht_total_ct  = 0;
2154   b->ht_insert_ct = 0;
2155 
2156   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2157   b->ijonly = PETSC_FALSE;
2158 
2159   b->in_loc = NULL;
2160   b->v_loc  = NULL;
2161   b->n_loc  = 0;
2162 
2163   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2164   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2165   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2166   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2167 #if defined(PETSC_HAVE_ELEMENTAL)
2168   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2169 #endif
2170 #if defined(PETSC_HAVE_SCALAPACK)
2171   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2172 #endif
2173   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2174   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2175 
2176   B->symmetric                   = PETSC_BOOL3_TRUE;
2177   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2178   B->symmetry_eternal            = PETSC_TRUE;
2179   B->structural_symmetry_eternal = PETSC_TRUE;
2180 #if defined(PETSC_USE_COMPLEX)
2181   B->hermitian = PETSC_BOOL3_FALSE;
2182 #else
2183   B->hermitian = PETSC_BOOL3_TRUE;
2184 #endif
2185 
2186   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2187   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2188   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2189   if (flg) {
2190     PetscReal fact = 1.39;
2191     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2192     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2193     if (fact <= 1.0) fact = 1.39;
2194     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2195     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2196   }
2197   PetscOptionsEnd();
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2202 /*MC
2203    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2204 
2205    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2206    and `MATMPISBAIJ` otherwise.
2207 
2208    Options Database Key:
2209 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2210 
2211   Level: beginner
2212 
2213 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2214 M*/
2215 
2216 /*@
2217   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2218   the user should preallocate the matrix storage by setting the parameters
2219   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2220   performance can be increased by more than a factor of 50.
2221 
2222   Collective
2223 
2224   Input Parameters:
2225 + B     - the matrix
2226 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2227           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2228 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2229           submatrix  (same for all local rows)
2230 . d_nnz - array containing the number of block nonzeros in the various block rows
2231           in the upper triangular and diagonal part of the in diagonal portion of the local
2232           (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2233           for the diagonal entry and set a value even if it is zero.
2234 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2235           submatrix (same for all local rows).
2236 - o_nnz - array containing the number of nonzeros in the various block rows of the
2237           off-diagonal portion of the local submatrix that is right of the diagonal
2238           (possibly different for each block row) or `NULL`.
2239 
2240   Options Database Keys:
2241 + -mat_no_unroll  - uses code that does not unroll the loops in the
2242                     block calculations (much slower)
2243 - -mat_block_size - size of the blocks to use
2244 
2245   Level: intermediate
2246 
2247   Notes:
2248 
2249   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2250   than it must be used on all processors that share the object for that argument.
2251 
2252   If the *_nnz parameter is given then the *_nz parameter is ignored
2253 
2254   Storage Information:
2255   For a square global matrix we define each processor's diagonal portion
2256   to be its local rows and the corresponding columns (a square submatrix);
2257   each processor's off-diagonal portion encompasses the remainder of the
2258   local matrix (a rectangular submatrix).
2259 
2260   The user can specify preallocated storage for the diagonal part of
2261   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2262   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2263   memory allocation.  Likewise, specify preallocated storage for the
2264   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2265 
2266   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2267   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2268   You can also run with the option `-info` and look for messages with the string
2269   malloc in them to see if additional memory allocation was needed.
2270 
2271   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2272   the figure below we depict these three local rows and all columns (0-11).
2273 
2274 .vb
2275            0 1 2 3 4 5 6 7 8 9 10 11
2276           --------------------------
2277    row 3  |. . . d d d o o o o  o  o
2278    row 4  |. . . d d d o o o o  o  o
2279    row 5  |. . . d d d o o o o  o  o
2280           --------------------------
2281 .ve
2282 
2283   Thus, any entries in the d locations are stored in the d (diagonal)
2284   submatrix, and any entries in the o locations are stored in the
2285   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2286   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2287 
2288   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2289   plus the diagonal part of the d matrix,
2290   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2291 
2292   In general, for PDE problems in which most nonzeros are near the diagonal,
2293   one expects `d_nz` >> `o_nz`.
2294 
2295 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2296 @*/
2297 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2298 {
2299   PetscFunctionBegin;
2300   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2301   PetscValidType(B, 1);
2302   PetscValidLogicalCollectiveInt(B, bs, 2);
2303   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2304   PetscFunctionReturn(PETSC_SUCCESS);
2305 }
2306 
2307 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2308 /*@
2309   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2310   (block compressed row).  For good matrix assembly performance
2311   the user should preallocate the matrix storage by setting the parameters
2312   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2313 
2314   Collective
2315 
2316   Input Parameters:
2317 + comm  - MPI communicator
2318 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2319           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2320 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2321           This value should be the same as the local size used in creating the
2322           y vector for the matrix-vector product y = Ax.
2323 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2324           This value should be the same as the local size used in creating the
2325           x vector for the matrix-vector product y = Ax.
2326 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2327 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2328 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2329           submatrix (same for all local rows)
2330 . d_nnz - array containing the number of block nonzeros in the various block rows
2331           in the upper triangular portion of the in diagonal portion of the local
2332           (possibly different for each block block row) or `NULL`.
2333           If you plan to factor the matrix you must leave room for the diagonal entry and
2334           set its value even if it is zero.
2335 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2336           submatrix (same for all local rows).
2337 - o_nnz - array containing the number of nonzeros in the various block rows of the
2338           off-diagonal portion of the local submatrix (possibly different for
2339           each block row) or `NULL`.
2340 
2341   Output Parameter:
2342 . A - the matrix
2343 
2344   Options Database Keys:
2345 + -mat_no_unroll  - uses code that does not unroll the loops in the
2346                     block calculations (much slower)
2347 . -mat_block_size - size of the blocks to use
2348 - -mat_mpi        - use the parallel matrix data structures even on one processor
2349                     (defaults to using SeqBAIJ format on one processor)
2350 
2351   Level: intermediate
2352 
2353   Notes:
2354   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2355   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2356   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2357 
2358   The number of rows and columns must be divisible by blocksize.
2359   This matrix type does not support complex Hermitian operation.
2360 
2361   The user MUST specify either the local or global matrix dimensions
2362   (possibly both).
2363 
2364   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2365   than it must be used on all processors that share the object for that argument.
2366 
2367   If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2368   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2369 
2370   If the *_nnz parameter is given then the *_nz parameter is ignored
2371 
2372   Storage Information:
2373   For a square global matrix we define each processor's diagonal portion
2374   to be its local rows and the corresponding columns (a square submatrix);
2375   each processor's off-diagonal portion encompasses the remainder of the
2376   local matrix (a rectangular submatrix).
2377 
2378   The user can specify preallocated storage for the diagonal part of
2379   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2380   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2381   memory allocation. Likewise, specify preallocated storage for the
2382   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2383 
2384   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2385   the figure below we depict these three local rows and all columns (0-11).
2386 
2387 .vb
2388            0 1 2 3 4 5 6 7 8 9 10 11
2389           --------------------------
2390    row 3  |. . . d d d o o o o  o  o
2391    row 4  |. . . d d d o o o o  o  o
2392    row 5  |. . . d d d o o o o  o  o
2393           --------------------------
2394 .ve
2395 
2396   Thus, any entries in the d locations are stored in the d (diagonal)
2397   submatrix, and any entries in the o locations are stored in the
2398   o (off-diagonal) submatrix. Note that the d matrix is stored in
2399   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2400 
2401   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2402   plus the diagonal part of the d matrix,
2403   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2404   In general, for PDE problems in which most nonzeros are near the diagonal,
2405   one expects `d_nz` >> `o_nz`.
2406 
2407 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2408           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2409 @*/
2410 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2411 {
2412   PetscMPIInt size;
2413 
2414   PetscFunctionBegin;
2415   PetscCall(MatCreate(comm, A));
2416   PetscCall(MatSetSizes(*A, m, n, M, N));
2417   PetscCallMPI(MPI_Comm_size(comm, &size));
2418   if (size > 1) {
2419     PetscCall(MatSetType(*A, MATMPISBAIJ));
2420     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2421   } else {
2422     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2423     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2424   }
2425   PetscFunctionReturn(PETSC_SUCCESS);
2426 }
2427 
2428 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2429 {
2430   Mat           mat;
2431   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2432   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2433   PetscScalar  *array;
2434 
2435   PetscFunctionBegin;
2436   *newmat = NULL;
2437 
2438   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2439   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2440   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2441   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2442   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2443 
2444   if (matin->hash_active) {
2445     PetscCall(MatSetUp(mat));
2446   } else {
2447     mat->factortype   = matin->factortype;
2448     mat->preallocated = PETSC_TRUE;
2449     mat->assembled    = PETSC_TRUE;
2450     mat->insertmode   = NOT_SET_VALUES;
2451 
2452     a      = (Mat_MPISBAIJ *)mat->data;
2453     a->bs2 = oldmat->bs2;
2454     a->mbs = oldmat->mbs;
2455     a->nbs = oldmat->nbs;
2456     a->Mbs = oldmat->Mbs;
2457     a->Nbs = oldmat->Nbs;
2458 
2459     a->size         = oldmat->size;
2460     a->rank         = oldmat->rank;
2461     a->donotstash   = oldmat->donotstash;
2462     a->roworiented  = oldmat->roworiented;
2463     a->rowindices   = NULL;
2464     a->rowvalues    = NULL;
2465     a->getrowactive = PETSC_FALSE;
2466     a->barray       = NULL;
2467     a->rstartbs     = oldmat->rstartbs;
2468     a->rendbs       = oldmat->rendbs;
2469     a->cstartbs     = oldmat->cstartbs;
2470     a->cendbs       = oldmat->cendbs;
2471 
2472     /* hash table stuff */
2473     a->ht           = NULL;
2474     a->hd           = NULL;
2475     a->ht_size      = 0;
2476     a->ht_flag      = oldmat->ht_flag;
2477     a->ht_fact      = oldmat->ht_fact;
2478     a->ht_total_ct  = 0;
2479     a->ht_insert_ct = 0;
2480 
2481     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2482     if (oldmat->colmap) {
2483 #if defined(PETSC_USE_CTABLE)
2484       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2485 #else
2486       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2487       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2488 #endif
2489     } else a->colmap = NULL;
2490 
2491     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2492       PetscCall(PetscMalloc1(len, &a->garray));
2493       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2494     } else a->garray = NULL;
2495 
2496     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2497     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2498     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2499 
2500     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2501     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2502 
2503     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2504     PetscCall(VecGetArray(a->slvec1, &array));
2505     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2506     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2507     PetscCall(VecRestoreArray(a->slvec1, &array));
2508     PetscCall(VecGetArray(a->slvec0, &array));
2509     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2510     PetscCall(VecRestoreArray(a->slvec0, &array));
2511 
2512     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2513     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2514     a->sMvctx = oldmat->sMvctx;
2515 
2516     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2517     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2518   }
2519   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2520   *newmat = mat;
2521   PetscFunctionReturn(PETSC_SUCCESS);
2522 }
2523 
2524 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2525 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2526 
2527 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2528 {
2529   PetscBool isbinary;
2530 
2531   PetscFunctionBegin;
2532   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2533   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2534   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2535   PetscFunctionReturn(PETSC_SUCCESS);
2536 }
2537 
2538 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2539 {
2540   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2541   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2542   PetscReal     atmp;
2543   PetscReal    *work, *svalues, *rvalues;
2544   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2545   PetscMPIInt   rank, size;
2546   PetscInt     *rowners_bs, count, source;
2547   PetscScalar  *va;
2548   MatScalar    *ba;
2549   MPI_Status    stat;
2550 
2551   PetscFunctionBegin;
2552   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2553   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2554   PetscCall(VecGetArray(v, &va));
2555 
2556   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2557   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2558 
2559   bs  = A->rmap->bs;
2560   mbs = a->mbs;
2561   Mbs = a->Mbs;
2562   ba  = b->a;
2563   bi  = b->i;
2564   bj  = b->j;
2565 
2566   /* find ownerships */
2567   rowners_bs = A->rmap->range;
2568 
2569   /* each proc creates an array to be distributed */
2570   PetscCall(PetscCalloc1(bs * Mbs, &work));
2571 
2572   /* row_max for B */
2573   if (rank != size - 1) {
2574     for (i = 0; i < mbs; i++) {
2575       ncols = bi[1] - bi[0];
2576       bi++;
2577       brow = bs * i;
2578       for (j = 0; j < ncols; j++) {
2579         bcol = bs * (*bj);
2580         for (kcol = 0; kcol < bs; kcol++) {
2581           col = bcol + kcol;           /* local col index */
2582           col += rowners_bs[rank + 1]; /* global col index */
2583           for (krow = 0; krow < bs; krow++) {
2584             atmp = PetscAbsScalar(*ba);
2585             ba++;
2586             row = brow + krow; /* local row index */
2587             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2588             if (work[col] < atmp) work[col] = atmp;
2589           }
2590         }
2591         bj++;
2592       }
2593     }
2594 
2595     /* send values to its owners */
2596     for (PetscMPIInt dest = rank + 1; dest < size; dest++) {
2597       svalues = work + rowners_bs[dest];
2598       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2599       PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2600     }
2601   }
2602 
2603   /* receive values */
2604   if (rank) {
2605     rvalues = work;
2606     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2607     for (source = 0; source < rank; source++) {
2608       PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2609       /* process values */
2610       for (i = 0; i < count; i++) {
2611         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2612       }
2613     }
2614   }
2615 
2616   PetscCall(VecRestoreArray(v, &va));
2617   PetscCall(PetscFree(work));
2618   PetscFunctionReturn(PETSC_SUCCESS);
2619 }
2620 
2621 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2622 {
2623   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2624   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2625   PetscScalar       *x, *ptr, *from;
2626   Vec                bb1;
2627   const PetscScalar *b;
2628 
2629   PetscFunctionBegin;
2630   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2631   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2632 
2633   if (flag == SOR_APPLY_UPPER) {
2634     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2635     PetscFunctionReturn(PETSC_SUCCESS);
2636   }
2637 
2638   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2639     if (flag & SOR_ZERO_INITIAL_GUESS) {
2640       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2641       its--;
2642     }
2643 
2644     PetscCall(VecDuplicate(bb, &bb1));
2645     while (its--) {
2646       /* lower triangular part: slvec0b = - B^T*xx */
2647       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2648 
2649       /* copy xx into slvec0a */
2650       PetscCall(VecGetArray(mat->slvec0, &ptr));
2651       PetscCall(VecGetArray(xx, &x));
2652       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2653       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2654 
2655       PetscCall(VecScale(mat->slvec0, -1.0));
2656 
2657       /* copy bb into slvec1a */
2658       PetscCall(VecGetArray(mat->slvec1, &ptr));
2659       PetscCall(VecGetArrayRead(bb, &b));
2660       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2661       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2662 
2663       /* set slvec1b = 0 */
2664       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2665       PetscCall(VecZeroEntries(mat->slvec1b));
2666 
2667       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2668       PetscCall(VecRestoreArray(xx, &x));
2669       PetscCall(VecRestoreArrayRead(bb, &b));
2670       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2671 
2672       /* upper triangular part: bb1 = bb1 - B*x */
2673       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2674 
2675       /* local diagonal sweep */
2676       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2677     }
2678     PetscCall(VecDestroy(&bb1));
2679   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2680     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2681   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2682     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2683   } else if (flag & SOR_EISENSTAT) {
2684     Vec                xx1;
2685     PetscBool          hasop;
2686     const PetscScalar *diag;
2687     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2688     PetscInt           i, n;
2689 
2690     if (!mat->xx1) {
2691       PetscCall(VecDuplicate(bb, &mat->xx1));
2692       PetscCall(VecDuplicate(bb, &mat->bb1));
2693     }
2694     xx1 = mat->xx1;
2695     bb1 = mat->bb1;
2696 
2697     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2698 
2699     if (!mat->diag) {
2700       /* this is wrong for same matrix with new nonzero values */
2701       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2702       PetscCall(MatGetDiagonal(matin, mat->diag));
2703     }
2704     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2705 
2706     if (hasop) {
2707       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2708       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2709     } else {
2710       /*
2711           These two lines are replaced by code that may be a bit faster for a good compiler
2712       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2713       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2714       */
2715       PetscCall(VecGetArray(mat->slvec1a, &sl));
2716       PetscCall(VecGetArrayRead(mat->diag, &diag));
2717       PetscCall(VecGetArrayRead(bb, &b));
2718       PetscCall(VecGetArray(xx, &x));
2719       PetscCall(VecGetLocalSize(xx, &n));
2720       if (omega == 1.0) {
2721         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2722         PetscCall(PetscLogFlops(2.0 * n));
2723       } else {
2724         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2725         PetscCall(PetscLogFlops(3.0 * n));
2726       }
2727       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2728       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2729       PetscCall(VecRestoreArrayRead(bb, &b));
2730       PetscCall(VecRestoreArray(xx, &x));
2731     }
2732 
2733     /* multiply off-diagonal portion of matrix */
2734     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2735     PetscCall(VecZeroEntries(mat->slvec1b));
2736     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2737     PetscCall(VecGetArray(mat->slvec0, &from));
2738     PetscCall(VecGetArray(xx, &x));
2739     PetscCall(PetscArraycpy(from, x, bs * mbs));
2740     PetscCall(VecRestoreArray(mat->slvec0, &from));
2741     PetscCall(VecRestoreArray(xx, &x));
2742     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2743     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2744     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2745 
2746     /* local sweep */
2747     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2748     PetscCall(VecAXPY(xx, 1.0, xx1));
2749   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2750   PetscFunctionReturn(PETSC_SUCCESS);
2751 }
2752 
2753 /*@
2754   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2755 
2756   Collective
2757 
2758   Input Parameters:
2759 + comm - MPI communicator
2760 . bs   - the block size, only a block size of 1 is supported
2761 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2762 . n    - This value should be the same as the local size used in creating the
2763          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2764          calculated if `N` is given) For square matrices `n` is almost always `m`.
2765 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2766 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2767 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2768 . j    - column indices
2769 - a    - matrix values
2770 
2771   Output Parameter:
2772 . mat - the matrix
2773 
2774   Level: intermediate
2775 
2776   Notes:
2777   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2778   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2779   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2780 
2781   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2782 
2783 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2784           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2785 @*/
2786 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2787 {
2788   PetscFunctionBegin;
2789   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2790   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2791   PetscCall(MatCreate(comm, mat));
2792   PetscCall(MatSetSizes(*mat, m, n, M, N));
2793   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2794   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2795   PetscFunctionReturn(PETSC_SUCCESS);
2796 }
2797 
2798 /*@
2799   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2800 
2801   Collective
2802 
2803   Input Parameters:
2804 + B  - the matrix
2805 . bs - the block size
2806 . i  - the indices into `j` for the start of each local row (indices start with zero)
2807 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2808 - v  - optional values in the matrix, pass `NULL` if not provided
2809 
2810   Level: advanced
2811 
2812   Notes:
2813   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2814   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2815   called this routine.
2816 
2817   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2818   and usually the numerical values as well
2819 
2820   Any entries passed in that are below the diagonal are ignored
2821 
2822 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2823           `MatCreateMPISBAIJWithArrays()`
2824 @*/
2825 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2826 {
2827   PetscFunctionBegin;
2828   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2829   PetscFunctionReturn(PETSC_SUCCESS);
2830 }
2831 
2832 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2833 {
2834   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2835   PetscInt    *indx;
2836   PetscScalar *values;
2837 
2838   PetscFunctionBegin;
2839   PetscCall(MatGetSize(inmat, &m, &N));
2840   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2841     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2842     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2843     PetscInt     *bindx, rmax = a->rmax, j;
2844     PetscMPIInt   rank, size;
2845 
2846     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2847     mbs = m / bs;
2848     Nbs = N / cbs;
2849     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2850     nbs = n / cbs;
2851 
2852     PetscCall(PetscMalloc1(rmax, &bindx));
2853     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2854 
2855     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2856     PetscCallMPI(MPI_Comm_rank(comm, &size));
2857     if (rank == size - 1) {
2858       /* Check sum(nbs) = Nbs */
2859       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2860     }
2861 
2862     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2863     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2864     for (i = 0; i < mbs; i++) {
2865       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2866       nnz = nnz / bs;
2867       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2868       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2869       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2870     }
2871     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2872     PetscCall(PetscFree(bindx));
2873 
2874     PetscCall(MatCreate(comm, outmat));
2875     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2876     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2877     PetscCall(MatSetType(*outmat, MATSBAIJ));
2878     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2879     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2880     MatPreallocateEnd(dnz, onz);
2881   }
2882 
2883   /* numeric phase */
2884   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2885   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2886 
2887   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2888   for (i = 0; i < m; i++) {
2889     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2890     Ii = i + rstart;
2891     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2892     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2893   }
2894   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2895   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2896   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2897   PetscFunctionReturn(PETSC_SUCCESS);
2898 }
2899