xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision bcd3bd92eda2d5998e2f14c4bbfb33bd936bdc3e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11 #if defined(PETSC_USE_LOG)
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13 #endif
14   PetscCall(MatStashDestroy_Private(&mat->stash));
15   PetscCall(MatStashDestroy_Private(&mat->bstash));
16   PetscCall(MatDestroy(&baij->A));
17   PetscCall(MatDestroy(&baij->B));
18 #if defined(PETSC_USE_CTABLE)
19   PetscCall(PetscHMapIDestroy(&baij->colmap));
20 #else
21   PetscCall(PetscFree(baij->colmap));
22 #endif
23   PetscCall(PetscFree(baij->garray));
24   PetscCall(VecDestroy(&baij->lvec));
25   PetscCall(VecScatterDestroy(&baij->Mvctx));
26   PetscCall(VecDestroy(&baij->slvec0));
27   PetscCall(VecDestroy(&baij->slvec0b));
28   PetscCall(VecDestroy(&baij->slvec1));
29   PetscCall(VecDestroy(&baij->slvec1a));
30   PetscCall(VecDestroy(&baij->slvec1b));
31   PetscCall(VecScatterDestroy(&baij->sMvctx));
32   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
33   PetscCall(PetscFree(baij->barray));
34   PetscCall(PetscFree(baij->hd));
35   PetscCall(VecDestroy(&baij->diag));
36   PetscCall(VecDestroy(&baij->bb1));
37   PetscCall(VecDestroy(&baij->xx1));
38 #if defined(PETSC_USE_REAL_MAT_SINGLE)
39   PetscCall(PetscFree(baij->setvaluescopy));
40 #endif
41   PetscCall(PetscFree(baij->in_loc));
42   PetscCall(PetscFree(baij->v_loc));
43   PetscCall(PetscFree(baij->rangebs));
44   PetscCall(PetscFree(mat->data));
45 
46   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
51 #if defined(PETSC_HAVE_ELEMENTAL)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
53 #endif
54 #if defined(PETSC_HAVE_SCALAPACK)
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
56 #endif
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
58   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
63 #define TYPE SBAIJ
64 #define TYPE_SBAIJ
65 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
66 #undef TYPE
67 #undef TYPE_SBAIJ
68 
69 #if defined(PETSC_HAVE_ELEMENTAL)
70 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
71 #endif
72 #if defined(PETSC_HAVE_SCALAPACK)
73 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74 #endif
75 
76 /* This could be moved to matimpl.h */
77 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78 {
79   Mat       preallocator;
80   PetscInt  r, rstart, rend;
81   PetscInt  bs, i, m, n, M, N;
82   PetscBool cong = PETSC_TRUE;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
86   PetscValidLogicalCollectiveInt(B, nm, 2);
87   for (i = 0; i < nm; i++) {
88     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
89     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
90     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91   }
92   PetscValidLogicalCollectiveBool(B, fill, 5);
93   PetscCall(MatGetBlockSize(B, &bs));
94   PetscCall(MatGetSize(B, &M, &N));
95   PetscCall(MatGetLocalSize(B, &m, &n));
96   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
97   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
98   PetscCall(MatSetBlockSize(preallocator, bs));
99   PetscCall(MatSetSizes(preallocator, m, n, M, N));
100   PetscCall(MatSetUp(preallocator));
101   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102   for (r = rstart; r < rend; ++r) {
103     PetscInt           ncols;
104     const PetscInt    *row;
105     const PetscScalar *vals;
106 
107     for (i = 0; i < nm; i++) {
108       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
109       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
110       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
111       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112     }
113   }
114   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
116   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
117   PetscCall(MatDestroy(&preallocator));
118   PetscFunctionReturn(PETSC_SUCCESS);
119 }
120 
121 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122 {
123   Mat      B;
124   PetscInt r;
125 
126   PetscFunctionBegin;
127   if (reuse != MAT_REUSE_MATRIX) {
128     PetscBool symm = PETSC_TRUE, isdense;
129     PetscInt  bs;
130 
131     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
132     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
133     PetscCall(MatSetType(B, newtype));
134     PetscCall(MatGetBlockSize(A, &bs));
135     PetscCall(MatSetBlockSize(B, bs));
136     PetscCall(PetscLayoutSetUp(B->rmap));
137     PetscCall(PetscLayoutSetUp(B->cmap));
138     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
139     if (!isdense) {
140       PetscCall(MatGetRowUpperTriangular(A));
141       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
142       PetscCall(MatRestoreRowUpperTriangular(A));
143     } else {
144       PetscCall(MatSetUp(B));
145     }
146   } else {
147     B = *newmat;
148     PetscCall(MatZeroEntries(B));
149   }
150 
151   PetscCall(MatGetRowUpperTriangular(A));
152   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153     PetscInt           ncols;
154     const PetscInt    *row;
155     const PetscScalar *vals;
156 
157     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
158     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159 #if defined(PETSC_USE_COMPLEX)
160     if (A->hermitian == PETSC_BOOL3_TRUE) {
161       PetscInt i;
162       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163     } else {
164       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165     }
166 #else
167     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168 #endif
169     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170   }
171   PetscCall(MatRestoreRowUpperTriangular(A));
172   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
173   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
174 
175   if (reuse == MAT_INPLACE_MATRIX) {
176     PetscCall(MatHeaderReplace(A, &B));
177   } else {
178     *newmat = B;
179   }
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
183 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184 {
185   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
186 
187   PetscFunctionBegin;
188   PetscCall(MatStoreValues(aij->A));
189   PetscCall(MatStoreValues(aij->B));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194 {
195   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
196 
197   PetscFunctionBegin;
198   PetscCall(MatRetrieveValues(aij->A));
199   PetscCall(MatRetrieveValues(aij->B));
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204   do { \
205     brow = row / bs; \
206     rp   = aj + ai[brow]; \
207     ap   = aa + bs2 * ai[brow]; \
208     rmax = aimax[brow]; \
209     nrow = ailen[brow]; \
210     bcol = col / bs; \
211     ridx = row % bs; \
212     cidx = col % bs; \
213     low  = 0; \
214     high = nrow; \
215     while (high - low > 3) { \
216       t = (low + high) / 2; \
217       if (rp[t] > bcol) high = t; \
218       else low = t; \
219     } \
220     for (_i = low; _i < high; _i++) { \
221       if (rp[_i] > bcol) break; \
222       if (rp[_i] == bcol) { \
223         bap = ap + bs2 * _i + bs * cidx + ridx; \
224         if (addv == ADD_VALUES) *bap += value; \
225         else *bap = value; \
226         goto a_noinsert; \
227       } \
228     } \
229     if (a->nonew == 1) goto a_noinsert; \
230     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232     N = nrow++ - 1; \
233     /* shift up all the later entries in this row */ \
234     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
235     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
236     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237     rp[_i]                          = bcol; \
238     ap[bs2 * _i + bs * cidx + ridx] = value; \
239     A->nonzerostate++; \
240   a_noinsert:; \
241     ailen[brow] = nrow; \
242   } while (0)
243 
244 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245   do { \
246     brow = row / bs; \
247     rp   = bj + bi[brow]; \
248     ap   = ba + bs2 * bi[brow]; \
249     rmax = bimax[brow]; \
250     nrow = bilen[brow]; \
251     bcol = col / bs; \
252     ridx = row % bs; \
253     cidx = col % bs; \
254     low  = 0; \
255     high = nrow; \
256     while (high - low > 3) { \
257       t = (low + high) / 2; \
258       if (rp[t] > bcol) high = t; \
259       else low = t; \
260     } \
261     for (_i = low; _i < high; _i++) { \
262       if (rp[_i] > bcol) break; \
263       if (rp[_i] == bcol) { \
264         bap = ap + bs2 * _i + bs * cidx + ridx; \
265         if (addv == ADD_VALUES) *bap += value; \
266         else *bap = value; \
267         goto b_noinsert; \
268       } \
269     } \
270     if (b->nonew == 1) goto b_noinsert; \
271     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273     N = nrow++ - 1; \
274     /* shift up all the later entries in this row */ \
275     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
276     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
277     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278     rp[_i]                          = bcol; \
279     ap[bs2 * _i + bs * cidx + ridx] = value; \
280     B->nonzerostate++; \
281   b_noinsert:; \
282     bilen[brow] = nrow; \
283   } while (0)
284 
285 /* Only add/insert a(i,j) with i<=j (blocks).
286    Any a(i,j) with i>j input by user is ignored or generates an error
287 */
288 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289 {
290   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291   MatScalar     value;
292   PetscBool     roworiented = baij->roworiented;
293   PetscInt      i, j, row, col;
294   PetscInt      rstart_orig = mat->rmap->rstart;
295   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
297 
298   /* Some Variables required in the macro */
299   Mat           A     = baij->A;
300   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
301   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302   MatScalar    *aa = a->a;
303 
304   Mat          B     = baij->B;
305   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
306   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307   MatScalar   *ba = b->a;
308 
309   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
310   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
311   MatScalar *ap, *bap;
312 
313   /* for stash */
314   PetscInt   n_loc, *in_loc = NULL;
315   MatScalar *v_loc = NULL;
316 
317   PetscFunctionBegin;
318   if (!baij->donotstash) {
319     if (n > baij->n_loc) {
320       PetscCall(PetscFree(baij->in_loc));
321       PetscCall(PetscFree(baij->v_loc));
322       PetscCall(PetscMalloc1(n, &baij->in_loc));
323       PetscCall(PetscMalloc1(n, &baij->v_loc));
324 
325       baij->n_loc = n;
326     }
327     in_loc = baij->in_loc;
328     v_loc  = baij->v_loc;
329   }
330 
331   for (i = 0; i < m; i++) {
332     if (im[i] < 0) continue;
333     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335       row = im[i] - rstart_orig;                     /* local row index */
336       for (j = 0; j < n; j++) {
337         if (im[i] / bs > in[j] / bs) {
338           if (a->ignore_ltriangular) {
339             continue; /* ignore lower triangular blocks */
340           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
341         }
342         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343           col  = in[j] - cstart_orig;                    /* local col index */
344           brow = row / bs;
345           bcol = col / bs;
346           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347           if (roworiented) value = v[i * n + j];
348           else value = v[i + j * m];
349           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
350           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351         } else if (in[j] < 0) {
352           continue;
353         } else {
354           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355           /* off-diag entry (B) */
356           if (mat->was_assembled) {
357             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358 #if defined(PETSC_USE_CTABLE)
359             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
360             col = col - 1;
361 #else
362             col = baij->colmap[in[j] / bs] - 1;
363 #endif
364             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
365               PetscCall(MatDisAssemble_MPISBAIJ(mat));
366               col = in[j];
367               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368               B     = baij->B;
369               b     = (Mat_SeqBAIJ *)(B)->data;
370               bimax = b->imax;
371               bi    = b->i;
372               bilen = b->ilen;
373               bj    = b->j;
374               ba    = b->a;
375             } else col += in[j] % bs;
376           } else col = in[j];
377           if (roworiented) value = v[i * n + j];
378           else value = v[i + j * m];
379           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
380           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381         }
382       }
383     } else { /* off processor entry */
384       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385       if (!baij->donotstash) {
386         mat->assembled = PETSC_FALSE;
387         n_loc          = 0;
388         for (j = 0; j < n; j++) {
389           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390           in_loc[n_loc] = in[j];
391           if (roworiented) {
392             v_loc[n_loc] = v[i * n + j];
393           } else {
394             v_loc[n_loc] = v[j * m + i];
395           }
396           n_loc++;
397         }
398         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399       }
400     }
401   }
402   PetscFunctionReturn(PETSC_SUCCESS);
403 }
404 
405 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406 {
407   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
408   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
409   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
410   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
411   PetscBool          roworiented = a->roworiented;
412   const PetscScalar *value       = v;
413   MatScalar         *ap, *aa = a->a, *bap;
414 
415   PetscFunctionBegin;
416   if (col < row) {
417     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
418     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
419   }
420   rp    = aj + ai[row];
421   ap    = aa + bs2 * ai[row];
422   rmax  = imax[row];
423   nrow  = ailen[row];
424   value = v;
425   low   = 0;
426   high  = nrow;
427 
428   while (high - low > 7) {
429     t = (low + high) / 2;
430     if (rp[t] > col) high = t;
431     else low = t;
432   }
433   for (i = low; i < high; i++) {
434     if (rp[i] > col) break;
435     if (rp[i] == col) {
436       bap = ap + bs2 * i;
437       if (roworiented) {
438         if (is == ADD_VALUES) {
439           for (ii = 0; ii < bs; ii++) {
440             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
441           }
442         } else {
443           for (ii = 0; ii < bs; ii++) {
444             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
445           }
446         }
447       } else {
448         if (is == ADD_VALUES) {
449           for (ii = 0; ii < bs; ii++) {
450             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
451           }
452         } else {
453           for (ii = 0; ii < bs; ii++) {
454             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
455           }
456         }
457       }
458       goto noinsert2;
459     }
460   }
461   if (nonew == 1) goto noinsert2;
462   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
463   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
464   N = nrow++ - 1;
465   high++;
466   /* shift up all the later entries in this row */
467   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
468   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
469   rp[i] = col;
470   bap   = ap + bs2 * i;
471   if (roworiented) {
472     for (ii = 0; ii < bs; ii++) {
473       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
474     }
475   } else {
476     for (ii = 0; ii < bs; ii++) {
477       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
478     }
479   }
480 noinsert2:;
481   ailen[row] = nrow;
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 /*
486    This routine is exactly duplicated in mpibaij.c
487 */
488 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489 {
490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
491   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
492   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
493   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
494   PetscBool          roworiented = a->roworiented;
495   const PetscScalar *value       = v;
496   MatScalar         *ap, *aa = a->a, *bap;
497 
498   PetscFunctionBegin;
499   rp    = aj + ai[row];
500   ap    = aa + bs2 * ai[row];
501   rmax  = imax[row];
502   nrow  = ailen[row];
503   low   = 0;
504   high  = nrow;
505   value = v;
506   while (high - low > 7) {
507     t = (low + high) / 2;
508     if (rp[t] > col) high = t;
509     else low = t;
510   }
511   for (i = low; i < high; i++) {
512     if (rp[i] > col) break;
513     if (rp[i] == col) {
514       bap = ap + bs2 * i;
515       if (roworiented) {
516         if (is == ADD_VALUES) {
517           for (ii = 0; ii < bs; ii++) {
518             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
519           }
520         } else {
521           for (ii = 0; ii < bs; ii++) {
522             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
523           }
524         }
525       } else {
526         if (is == ADD_VALUES) {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
529             bap += bs;
530           }
531         } else {
532           for (ii = 0; ii < bs; ii++, value += bs) {
533             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
534             bap += bs;
535           }
536         }
537       }
538       goto noinsert2;
539     }
540   }
541   if (nonew == 1) goto noinsert2;
542   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
543   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
544   N = nrow++ - 1;
545   high++;
546   /* shift up all the later entries in this row */
547   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
548   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
549   rp[i] = col;
550   bap   = ap + bs2 * i;
551   if (roworiented) {
552     for (ii = 0; ii < bs; ii++) {
553       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
554     }
555   } else {
556     for (ii = 0; ii < bs; ii++) {
557       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
558     }
559   }
560 noinsert2:;
561   ailen[row] = nrow;
562   PetscFunctionReturn(PETSC_SUCCESS);
563 }
564 
565 /*
566     This routine could be optimized by removing the need for the block copy below and passing stride information
567   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
568 */
569 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570 {
571   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
572   const MatScalar *value;
573   MatScalar       *barray      = baij->barray;
574   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
576   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
578 
579   PetscFunctionBegin;
580   if (!barray) {
581     PetscCall(PetscMalloc1(bs2, &barray));
582     baij->barray = barray;
583   }
584 
585   if (roworiented) {
586     stepval = (n - 1) * bs;
587   } else {
588     stepval = (m - 1) * bs;
589   }
590   for (i = 0; i < m; i++) {
591     if (im[i] < 0) continue;
592     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
593     if (im[i] >= rstart && im[i] < rend) {
594       row = im[i] - rstart;
595       for (j = 0; j < n; j++) {
596         if (im[i] > in[j]) {
597           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599         }
600         /* If NumCol = 1 then a copy is not required */
601         if ((roworiented) && (n == 1)) {
602           barray = (MatScalar *)v + i * bs2;
603         } else if ((!roworiented) && (m == 1)) {
604           barray = (MatScalar *)v + j * bs2;
605         } else { /* Here a copy is required */
606           if (roworiented) {
607             value = v + i * (stepval + bs) * bs + j * bs;
608           } else {
609             value = v + j * (stepval + bs) * bs + i * bs;
610           }
611           for (ii = 0; ii < bs; ii++, value += stepval) {
612             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
613           }
614           barray -= bs2;
615         }
616 
617         if (in[j] >= cstart && in[j] < cend) {
618           col = in[j] - cstart;
619           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620         } else if (in[j] < 0) {
621           continue;
622         } else {
623           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
624           if (mat->was_assembled) {
625             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
626 
627 #if defined(PETSC_USE_DEBUG)
628   #if defined(PETSC_USE_CTABLE)
629             {
630               PetscInt data;
631               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
632               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
633             }
634   #else
635             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
636   #endif
637 #endif
638 #if defined(PETSC_USE_CTABLE)
639             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
640             col = (col - 1) / bs;
641 #else
642             col = (baij->colmap[in[j]] - 1) / bs;
643 #endif
644             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
645               PetscCall(MatDisAssemble_MPISBAIJ(mat));
646               col = in[j];
647             }
648           } else col = in[j];
649           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
650         }
651       }
652     } else {
653       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
654       if (!baij->donotstash) {
655         if (roworiented) {
656           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         } else {
658           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
659         }
660       }
661     }
662   }
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667 {
668   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
671 
672   PetscFunctionBegin;
673   for (i = 0; i < m; i++) {
674     if (idxm[i] < 0) continue; /* negative row */
675     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677       row = idxm[i] - bsrstart;
678       for (j = 0; j < n; j++) {
679         if (idxn[j] < 0) continue; /* negative column */
680         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681         if (idxn[j] >= bscstart && idxn[j] < bscend) {
682           col = idxn[j] - bscstart;
683           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684         } else {
685           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686 #if defined(PETSC_USE_CTABLE)
687           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688           data--;
689 #else
690           data = baij->colmap[idxn[j] / bs] - 1;
691 #endif
692           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693           else {
694             col = data + idxn[j] % bs;
695             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696           }
697         }
698       }
699     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705 {
706   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707   PetscReal     sum[2], *lnorm2;
708 
709   PetscFunctionBegin;
710   if (baij->size == 1) {
711     PetscCall(MatNorm(baij->A, type, norm));
712   } else {
713     if (type == NORM_FROBENIUS) {
714       PetscCall(PetscMalloc1(2, &lnorm2));
715       PetscCall(MatNorm(baij->A, type, lnorm2));
716       *lnorm2 = (*lnorm2) * (*lnorm2);
717       lnorm2++; /* squar power of norm(A) */
718       PetscCall(MatNorm(baij->B, type, lnorm2));
719       *lnorm2 = (*lnorm2) * (*lnorm2);
720       lnorm2--; /* squar power of norm(B) */
721       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
722       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
723       PetscCall(PetscFree(lnorm2));
724     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
725       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
726       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
727       PetscReal    *rsum, *rsum2, vabs;
728       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
730       MatScalar    *v;
731 
732       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
733       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
734       /* Amat */
735       v  = amat->a;
736       jj = amat->j;
737       for (brow = 0; brow < mbs; brow++) {
738         grow = bs * (rstart + brow);
739         nz   = amat->i[brow + 1] - amat->i[brow];
740         for (bcol = 0; bcol < nz; bcol++) {
741           gcol = bs * (rstart + *jj);
742           jj++;
743           for (col = 0; col < bs; col++) {
744             for (row = 0; row < bs; row++) {
745               vabs = PetscAbsScalar(*v);
746               v++;
747               rsum[gcol + col] += vabs;
748               /* non-diagonal block */
749               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
750             }
751           }
752         }
753         PetscCall(PetscLogFlops(nz * bs * bs));
754       }
755       /* Bmat */
756       v  = bmat->a;
757       jj = bmat->j;
758       for (brow = 0; brow < mbs; brow++) {
759         grow = bs * (rstart + brow);
760         nz   = bmat->i[brow + 1] - bmat->i[brow];
761         for (bcol = 0; bcol < nz; bcol++) {
762           gcol = bs * garray[*jj];
763           jj++;
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               vabs = PetscAbsScalar(*v);
767               v++;
768               rsum[gcol + col] += vabs;
769               rsum[grow + row] += vabs;
770             }
771           }
772         }
773         PetscCall(PetscLogFlops(nz * bs * bs));
774       }
775       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
776       *norm = 0.0;
777       for (col = 0; col < mat->cmap->N; col++) {
778         if (rsum2[col] > *norm) *norm = rsum2[col];
779       }
780       PetscCall(PetscFree2(rsum, rsum2));
781     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782   }
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787 {
788   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
789   PetscInt      nstash, reallocs;
790 
791   PetscFunctionBegin;
792   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
793 
794   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
795   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
799   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
800   PetscFunctionReturn(PETSC_SUCCESS);
801 }
802 
803 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804 {
805   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
807   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
808   PetscInt     *row, *col;
809   PetscBool     other_disassembled;
810   PetscMPIInt   n;
811   PetscBool     r1, r2, r3;
812   MatScalar    *val;
813 
814   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815   PetscFunctionBegin;
816   if (!baij->donotstash && !mat->nooffprocentries) {
817     while (1) {
818       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819       if (!flg) break;
820 
821       for (i = 0; i < n;) {
822         /* Now identify the consecutive vals belonging to the same row */
823         for (j = i, rstart = row[j]; j < n; j++) {
824           if (row[j] != rstart) break;
825         }
826         if (j < n) ncols = j - i;
827         else ncols = n - i;
828         /* Now assemble all these values with a single function call */
829         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830         i = j;
831       }
832     }
833     PetscCall(MatStashScatterEnd_Private(&mat->stash));
834     /* Now process the block-stash. Since the values are stashed column-oriented,
835        set the roworiented flag to column oriented, and after MatSetValues()
836        restore the original flags */
837     r1 = baij->roworiented;
838     r2 = a->roworiented;
839     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
840 
841     baij->roworiented = PETSC_FALSE;
842     a->roworiented    = PETSC_FALSE;
843 
844     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845     while (1) {
846       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847       if (!flg) break;
848 
849       for (i = 0; i < n;) {
850         /* Now identify the consecutive vals belonging to the same row */
851         for (j = i, rstart = row[j]; j < n; j++) {
852           if (row[j] != rstart) break;
853         }
854         if (j < n) ncols = j - i;
855         else ncols = n - i;
856         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857         i = j;
858       }
859     }
860     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
861 
862     baij->roworiented = r1;
863     a->roworiented    = r2;
864 
865     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866   }
867 
868   PetscCall(MatAssemblyBegin(baij->A, mode));
869   PetscCall(MatAssemblyEnd(baij->A, mode));
870 
871   /* determine if any processor has disassembled, if so we must
872      also disassemble ourselves, in order that we may reassemble. */
873   /*
874      if nonzero structure of submatrix B cannot change then we know that
875      no processor disassembled thus we can skip this stuff
876   */
877   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
878     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
879     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880   }
881 
882   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
883   PetscCall(MatAssemblyBegin(baij->B, mode));
884   PetscCall(MatAssemblyEnd(baij->B, mode));
885 
886   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
887 
888   baij->rowvalues = NULL;
889 
890   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
891   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
893     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
899 #include <petscdraw.h>
900 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901 {
902   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
903   PetscInt          bs   = mat->rmap->bs;
904   PetscMPIInt       rank = baij->rank;
905   PetscBool         iascii, isdraw;
906   PetscViewer       sviewer;
907   PetscViewerFormat format;
908 
909   PetscFunctionBegin;
910   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
911   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
912   if (iascii) {
913     PetscCall(PetscViewerGetFormat(viewer, &format));
914     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915       MatInfo info;
916       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
917       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
918       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
919       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
920                                                    mat->rmap->bs, (double)info.memory));
921       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
924       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
925       PetscCall(PetscViewerFlush(viewer));
926       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
927       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
928       PetscCall(VecScatterView(baij->Mvctx, viewer));
929       PetscFunctionReturn(PETSC_SUCCESS);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
932       PetscFunctionReturn(PETSC_SUCCESS);
933     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
934       PetscFunctionReturn(PETSC_SUCCESS);
935     }
936   }
937 
938   if (isdraw) {
939     PetscDraw draw;
940     PetscBool isnull;
941     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
942     PetscCall(PetscDrawIsNull(draw, &isnull));
943     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944   }
945 
946   {
947     /* assemble the entire matrix onto first processor. */
948     Mat           A;
949     Mat_SeqSBAIJ *Aloc;
950     Mat_SeqBAIJ  *Bloc;
951     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952     MatScalar    *a;
953     const char   *matname;
954 
955     /* Should this be the same type as mat? */
956     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957     if (rank == 0) {
958       PetscCall(MatSetSizes(A, M, N, M, N));
959     } else {
960       PetscCall(MatSetSizes(A, 0, 0, M, N));
961     }
962     PetscCall(MatSetType(A, MATMPISBAIJ));
963     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
964     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
968     ai   = Aloc->i;
969     aj   = Aloc->j;
970     a    = Aloc->a;
971     PetscCall(PetscMalloc1(bs, &rvals));
972 
973     for (i = 0; i < mbs; i++) {
974       rvals[0] = bs * (baij->rstartbs + i);
975       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976       for (j = ai[i]; j < ai[i + 1]; j++) {
977         col = (baij->cstartbs + aj[j]) * bs;
978         for (k = 0; k < bs; k++) {
979           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
980           col++;
981           a += bs;
982         }
983       }
984     }
985     /* copy over the B part */
986     Bloc = (Mat_SeqBAIJ *)baij->B->data;
987     ai   = Bloc->i;
988     aj   = Bloc->j;
989     a    = Bloc->a;
990     for (i = 0; i < mbs; i++) {
991       rvals[0] = bs * (baij->rstartbs + i);
992       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993       for (j = ai[i]; j < ai[i + 1]; j++) {
994         col = baij->garray[aj[j]] * bs;
995         for (k = 0; k < bs; k++) {
996           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
997           col++;
998           a += bs;
999         }
1000       }
1001     }
1002     PetscCall(PetscFree(rvals));
1003     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1004     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005     /*
1006        Everyone has to call to draw the matrix since the graphics waits are
1007        synchronized across all processors that share the PetscDraw object
1008     */
1009     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1010     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011     if (rank == 0) {
1012       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
1013       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014     }
1015     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1016     PetscCall(PetscViewerFlush(viewer));
1017     PetscCall(MatDestroy(&A));
1018   }
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
1022 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1024 
1025 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026 {
1027   PetscBool iascii, isdraw, issocket, isbinary;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1032   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1033   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034   if (iascii || isdraw || issocket) {
1035     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1036   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041 {
1042   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1043   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1044   PetscScalar       *from;
1045   const PetscScalar *x;
1046 
1047   PetscFunctionBegin;
1048   /* diagonal part */
1049   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1050   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1051   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1052   PetscCall(VecZeroEntries(a->slvec1b));
1053 
1054   /* subdiagonal part */
1055   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1056   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1057 
1058   /* copy x into the vec slvec0 */
1059   PetscCall(VecGetArray(a->slvec0, &from));
1060   PetscCall(VecGetArrayRead(xx, &x));
1061 
1062   PetscCall(PetscArraycpy(from, x, bs * mbs));
1063   PetscCall(VecRestoreArray(a->slvec0, &from));
1064   PetscCall(VecRestoreArrayRead(xx, &x));
1065 
1066   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1067   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1068   /* supperdiagonal part */
1069   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1070   PetscFunctionReturn(PETSC_SUCCESS);
1071 }
1072 
1073 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1074 {
1075   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1076   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1077   PetscScalar       *from;
1078   const PetscScalar *x;
1079 
1080   PetscFunctionBegin;
1081   /* diagonal part */
1082   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1083   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1084   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1085   PetscCall(VecZeroEntries(a->slvec1b));
1086 
1087   /* subdiagonal part */
1088   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1089 
1090   /* copy x into the vec slvec0 */
1091   PetscCall(VecGetArray(a->slvec0, &from));
1092   PetscCall(VecGetArrayRead(xx, &x));
1093 
1094   PetscCall(PetscArraycpy(from, x, bs * mbs));
1095   PetscCall(VecRestoreArray(a->slvec0, &from));
1096   PetscCall(VecRestoreArrayRead(xx, &x));
1097 
1098   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1099   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1100   /* supperdiagonal part */
1101   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1106 {
1107   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1108   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1109   PetscScalar       *from;
1110   const PetscScalar *x;
1111 
1112   PetscFunctionBegin;
1113   /* diagonal part */
1114   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1115   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1116   PetscCall(VecZeroEntries(a->slvec1b));
1117 
1118   /* subdiagonal part */
1119   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1120   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1121 
1122   /* copy x into the vec slvec0 */
1123   PetscCall(VecGetArray(a->slvec0, &from));
1124   PetscCall(VecGetArrayRead(xx, &x));
1125   PetscCall(PetscArraycpy(from, x, bs * mbs));
1126   PetscCall(VecRestoreArray(a->slvec0, &from));
1127 
1128   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1129   PetscCall(VecRestoreArrayRead(xx, &x));
1130   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1131 
1132   /* supperdiagonal part */
1133   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1138 {
1139   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1140   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1141   PetscScalar       *from;
1142   const PetscScalar *x;
1143 
1144   PetscFunctionBegin;
1145   /* diagonal part */
1146   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1147   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1148   PetscCall(VecZeroEntries(a->slvec1b));
1149 
1150   /* subdiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1152 
1153   /* copy x into the vec slvec0 */
1154   PetscCall(VecGetArray(a->slvec0, &from));
1155   PetscCall(VecGetArrayRead(xx, &x));
1156   PetscCall(PetscArraycpy(from, x, bs * mbs));
1157   PetscCall(VecRestoreArray(a->slvec0, &from));
1158 
1159   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1160   PetscCall(VecRestoreArrayRead(xx, &x));
1161   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1162 
1163   /* supperdiagonal part */
1164   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 /*
1169   This only works correctly for square matrices where the subblock A->A is the
1170    diagonal block
1171 */
1172 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1173 {
1174   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1178   PetscCall(MatGetDiagonal(a->A, v));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1183 {
1184   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(MatScale(a->A, aa));
1188   PetscCall(MatScale(a->B, aa));
1189   PetscFunctionReturn(PETSC_SUCCESS);
1190 }
1191 
1192 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1193 {
1194   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1195   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1196   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1197   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1198   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1199 
1200   PetscFunctionBegin;
1201   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1202   mat->getrowactive = PETSC_TRUE;
1203 
1204   if (!mat->rowvalues && (idx || v)) {
1205     /*
1206         allocate enough space to hold information from the longest row.
1207     */
1208     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1209     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1210     PetscInt      max = 1, mbs = mat->mbs, tmp;
1211     for (i = 0; i < mbs; i++) {
1212       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1213       if (max < tmp) max = tmp;
1214     }
1215     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1216   }
1217 
1218   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1219   lrow = row - brstart; /* local row index */
1220 
1221   pvA = &vworkA;
1222   pcA = &cworkA;
1223   pvB = &vworkB;
1224   pcB = &cworkB;
1225   if (!v) {
1226     pvA = NULL;
1227     pvB = NULL;
1228   }
1229   if (!idx) {
1230     pcA = NULL;
1231     if (!v) pcB = NULL;
1232   }
1233   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1234   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1235   nztot = nzA + nzB;
1236 
1237   cmap = mat->garray;
1238   if (v || idx) {
1239     if (nztot) {
1240       /* Sort by increasing column numbers, assuming A and B already sorted */
1241       PetscInt imark = -1;
1242       if (v) {
1243         *v = v_p = mat->rowvalues;
1244         for (i = 0; i < nzB; i++) {
1245           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1246           else break;
1247         }
1248         imark = i;
1249         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1250         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1251       }
1252       if (idx) {
1253         *idx = idx_p = mat->rowindices;
1254         if (imark > -1) {
1255           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1256         } else {
1257           for (i = 0; i < nzB; i++) {
1258             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259             else break;
1260           }
1261           imark = i;
1262         }
1263         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1264         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1265       }
1266     } else {
1267       if (idx) *idx = NULL;
1268       if (v) *v = NULL;
1269     }
1270   }
1271   *nz = nztot;
1272   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1273   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1278 {
1279   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1280 
1281   PetscFunctionBegin;
1282   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1283   baij->getrowactive = PETSC_FALSE;
1284   PetscFunctionReturn(PETSC_SUCCESS);
1285 }
1286 
1287 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1288 {
1289   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1290   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1291 
1292   PetscFunctionBegin;
1293   aA->getrow_utriangular = PETSC_TRUE;
1294   PetscFunctionReturn(PETSC_SUCCESS);
1295 }
1296 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1297 {
1298   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1299   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1300 
1301   PetscFunctionBegin;
1302   aA->getrow_utriangular = PETSC_FALSE;
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1307 {
1308   PetscFunctionBegin;
1309   if (PetscDefined(USE_COMPLEX)) {
1310     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1311 
1312     PetscCall(MatConjugate(a->A));
1313     PetscCall(MatConjugate(a->B));
1314   }
1315   PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317 
1318 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1319 {
1320   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1321 
1322   PetscFunctionBegin;
1323   PetscCall(MatRealPart(a->A));
1324   PetscCall(MatRealPart(a->B));
1325   PetscFunctionReturn(PETSC_SUCCESS);
1326 }
1327 
1328 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1329 {
1330   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1331 
1332   PetscFunctionBegin;
1333   PetscCall(MatImaginaryPart(a->A));
1334   PetscCall(MatImaginaryPart(a->B));
1335   PetscFunctionReturn(PETSC_SUCCESS);
1336 }
1337 
1338 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1339    Input: isrow       - distributed(parallel),
1340           iscol_local - locally owned (seq)
1341 */
1342 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1343 {
1344   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1345   const PetscInt *ptr1, *ptr2;
1346 
1347   PetscFunctionBegin;
1348   PetscCall(ISGetLocalSize(isrow, &sz1));
1349   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1350   if (sz1 > sz2) {
1351     *flg = PETSC_FALSE;
1352     PetscFunctionReturn(PETSC_SUCCESS);
1353   }
1354 
1355   PetscCall(ISGetIndices(isrow, &ptr1));
1356   PetscCall(ISGetIndices(iscol_local, &ptr2));
1357 
1358   PetscCall(PetscMalloc1(sz1, &a1));
1359   PetscCall(PetscMalloc1(sz2, &a2));
1360   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1361   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1362   PetscCall(PetscSortInt(sz1, a1));
1363   PetscCall(PetscSortInt(sz2, a2));
1364 
1365   nmatch = 0;
1366   k      = 0;
1367   for (i = 0; i < sz1; i++) {
1368     for (j = k; j < sz2; j++) {
1369       if (a1[i] == a2[j]) {
1370         k = j;
1371         nmatch++;
1372         break;
1373       }
1374     }
1375   }
1376   PetscCall(ISRestoreIndices(isrow, &ptr1));
1377   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1378   PetscCall(PetscFree(a1));
1379   PetscCall(PetscFree(a2));
1380   if (nmatch < sz1) {
1381     *flg = PETSC_FALSE;
1382   } else {
1383     *flg = PETSC_TRUE;
1384   }
1385   PetscFunctionReturn(PETSC_SUCCESS);
1386 }
1387 
1388 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1389 {
1390   IS        iscol_local;
1391   PetscInt  csize;
1392   PetscBool isequal;
1393 
1394   PetscFunctionBegin;
1395   PetscCall(ISGetLocalSize(iscol, &csize));
1396   if (call == MAT_REUSE_MATRIX) {
1397     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1398     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1399   } else {
1400     PetscBool issorted;
1401 
1402     PetscCall(ISAllGather(iscol, &iscol_local));
1403     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1404     PetscCall(ISSorted(iscol_local, &issorted));
1405     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1406   }
1407 
1408   /* now call MatCreateSubMatrix_MPIBAIJ() */
1409   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1410   if (call == MAT_INITIAL_MATRIX) {
1411     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1412     PetscCall(ISDestroy(&iscol_local));
1413   }
1414   PetscFunctionReturn(PETSC_SUCCESS);
1415 }
1416 
1417 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1418 {
1419   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(MatZeroEntries(l->A));
1423   PetscCall(MatZeroEntries(l->B));
1424   PetscFunctionReturn(PETSC_SUCCESS);
1425 }
1426 
1427 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1428 {
1429   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1430   Mat            A = a->A, B = a->B;
1431   PetscLogDouble isend[5], irecv[5];
1432 
1433   PetscFunctionBegin;
1434   info->block_size = (PetscReal)matin->rmap->bs;
1435 
1436   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1437 
1438   isend[0] = info->nz_used;
1439   isend[1] = info->nz_allocated;
1440   isend[2] = info->nz_unneeded;
1441   isend[3] = info->memory;
1442   isend[4] = info->mallocs;
1443 
1444   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1445 
1446   isend[0] += info->nz_used;
1447   isend[1] += info->nz_allocated;
1448   isend[2] += info->nz_unneeded;
1449   isend[3] += info->memory;
1450   isend[4] += info->mallocs;
1451   if (flag == MAT_LOCAL) {
1452     info->nz_used      = isend[0];
1453     info->nz_allocated = isend[1];
1454     info->nz_unneeded  = isend[2];
1455     info->memory       = isend[3];
1456     info->mallocs      = isend[4];
1457   } else if (flag == MAT_GLOBAL_MAX) {
1458     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1459 
1460     info->nz_used      = irecv[0];
1461     info->nz_allocated = irecv[1];
1462     info->nz_unneeded  = irecv[2];
1463     info->memory       = irecv[3];
1464     info->mallocs      = irecv[4];
1465   } else if (flag == MAT_GLOBAL_SUM) {
1466     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1467 
1468     info->nz_used      = irecv[0];
1469     info->nz_allocated = irecv[1];
1470     info->nz_unneeded  = irecv[2];
1471     info->memory       = irecv[3];
1472     info->mallocs      = irecv[4];
1473   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1474   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1475   info->fill_ratio_needed = 0;
1476   info->factor_mallocs    = 0;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1481 {
1482   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1483   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1484 
1485   PetscFunctionBegin;
1486   switch (op) {
1487   case MAT_NEW_NONZERO_LOCATIONS:
1488   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1489   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1490   case MAT_KEEP_NONZERO_PATTERN:
1491   case MAT_SUBMAT_SINGLEIS:
1492   case MAT_NEW_NONZERO_LOCATION_ERR:
1493     MatCheckPreallocated(A, 1);
1494     PetscCall(MatSetOption(a->A, op, flg));
1495     PetscCall(MatSetOption(a->B, op, flg));
1496     break;
1497   case MAT_ROW_ORIENTED:
1498     MatCheckPreallocated(A, 1);
1499     a->roworiented = flg;
1500 
1501     PetscCall(MatSetOption(a->A, op, flg));
1502     PetscCall(MatSetOption(a->B, op, flg));
1503     break;
1504   case MAT_FORCE_DIAGONAL_ENTRIES:
1505   case MAT_SORTED_FULL:
1506     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1507     break;
1508   case MAT_IGNORE_OFF_PROC_ENTRIES:
1509     a->donotstash = flg;
1510     break;
1511   case MAT_USE_HASH_TABLE:
1512     a->ht_flag = flg;
1513     break;
1514   case MAT_HERMITIAN:
1515     MatCheckPreallocated(A, 1);
1516     PetscCall(MatSetOption(a->A, op, flg));
1517 #if defined(PETSC_USE_COMPLEX)
1518     if (flg) { /* need different mat-vec ops */
1519       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1520       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1521       A->ops->multtranspose    = NULL;
1522       A->ops->multtransposeadd = NULL;
1523       A->symmetric             = PETSC_BOOL3_FALSE;
1524     }
1525 #endif
1526     break;
1527   case MAT_SPD:
1528   case MAT_SYMMETRIC:
1529     MatCheckPreallocated(A, 1);
1530     PetscCall(MatSetOption(a->A, op, flg));
1531 #if defined(PETSC_USE_COMPLEX)
1532     if (flg) { /* restore to use default mat-vec ops */
1533       A->ops->mult             = MatMult_MPISBAIJ;
1534       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1535       A->ops->multtranspose    = MatMult_MPISBAIJ;
1536       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1537     }
1538 #endif
1539     break;
1540   case MAT_STRUCTURALLY_SYMMETRIC:
1541     MatCheckPreallocated(A, 1);
1542     PetscCall(MatSetOption(a->A, op, flg));
1543     break;
1544   case MAT_SYMMETRY_ETERNAL:
1545   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1546     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1547     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1548     break;
1549   case MAT_SPD_ETERNAL:
1550     break;
1551   case MAT_IGNORE_LOWER_TRIANGULAR:
1552     aA->ignore_ltriangular = flg;
1553     break;
1554   case MAT_ERROR_LOWER_TRIANGULAR:
1555     aA->ignore_ltriangular = flg;
1556     break;
1557   case MAT_GETROW_UPPERTRIANGULAR:
1558     aA->getrow_utriangular = flg;
1559     break;
1560   default:
1561     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1562   }
1563   PetscFunctionReturn(PETSC_SUCCESS);
1564 }
1565 
1566 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1567 {
1568   PetscFunctionBegin;
1569   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1570   if (reuse == MAT_INITIAL_MATRIX) {
1571     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1572   } else if (reuse == MAT_REUSE_MATRIX) {
1573     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1574   }
1575   PetscFunctionReturn(PETSC_SUCCESS);
1576 }
1577 
1578 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1579 {
1580   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1581   Mat           a = baij->A, b = baij->B;
1582   PetscInt      nv, m, n;
1583   PetscBool     flg;
1584 
1585   PetscFunctionBegin;
1586   if (ll != rr) {
1587     PetscCall(VecEqual(ll, rr, &flg));
1588     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1589   }
1590   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1591 
1592   PetscCall(MatGetLocalSize(mat, &m, &n));
1593   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1594 
1595   PetscCall(VecGetLocalSize(rr, &nv));
1596   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1597 
1598   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1599 
1600   /* left diagonalscale the off-diagonal part */
1601   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1602 
1603   /* scale the diagonal part */
1604   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1605 
1606   /* right diagonalscale the off-diagonal part */
1607   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1608   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1613 {
1614   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1615 
1616   PetscFunctionBegin;
1617   PetscCall(MatSetUnfactored(a->A));
1618   PetscFunctionReturn(PETSC_SUCCESS);
1619 }
1620 
1621 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1622 
1623 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1624 {
1625   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1626   Mat           a, b, c, d;
1627   PetscBool     flg;
1628 
1629   PetscFunctionBegin;
1630   a = matA->A;
1631   b = matA->B;
1632   c = matB->A;
1633   d = matB->B;
1634 
1635   PetscCall(MatEqual(a, c, &flg));
1636   if (flg) PetscCall(MatEqual(b, d, &flg));
1637   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1638   PetscFunctionReturn(PETSC_SUCCESS);
1639 }
1640 
1641 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1642 {
1643   PetscBool isbaij;
1644 
1645   PetscFunctionBegin;
1646   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1647   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1648   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1649   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1650     PetscCall(MatGetRowUpperTriangular(A));
1651     PetscCall(MatCopy_Basic(A, B, str));
1652     PetscCall(MatRestoreRowUpperTriangular(A));
1653   } else {
1654     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1655     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1656 
1657     PetscCall(MatCopy(a->A, b->A, str));
1658     PetscCall(MatCopy(a->B, b->B, str));
1659   }
1660   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1661   PetscFunctionReturn(PETSC_SUCCESS);
1662 }
1663 
1664 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1665 {
1666   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1667   PetscBLASInt  bnz, one                          = 1;
1668   Mat_SeqSBAIJ *xa, *ya;
1669   Mat_SeqBAIJ  *xb, *yb;
1670 
1671   PetscFunctionBegin;
1672   if (str == SAME_NONZERO_PATTERN) {
1673     PetscScalar alpha = a;
1674     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1675     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1676     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1677     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1678     xb = (Mat_SeqBAIJ *)xx->B->data;
1679     yb = (Mat_SeqBAIJ *)yy->B->data;
1680     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1681     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1682     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1683   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1684     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1685     PetscCall(MatAXPY_Basic(Y, a, X, str));
1686     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1687   } else {
1688     Mat       B;
1689     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1690     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1691     PetscCall(MatGetRowUpperTriangular(X));
1692     PetscCall(MatGetRowUpperTriangular(Y));
1693     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1694     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1695     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1696     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1697     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1698     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1699     PetscCall(MatSetType(B, MATMPISBAIJ));
1700     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1701     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1702     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1703     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1704     PetscCall(MatHeaderMerge(Y, &B));
1705     PetscCall(PetscFree(nnz_d));
1706     PetscCall(PetscFree(nnz_o));
1707     PetscCall(MatRestoreRowUpperTriangular(X));
1708     PetscCall(MatRestoreRowUpperTriangular(Y));
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1714 {
1715   PetscInt  i;
1716   PetscBool flg;
1717 
1718   PetscFunctionBegin;
1719   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1720   for (i = 0; i < n; i++) {
1721     PetscCall(ISEqual(irow[i], icol[i], &flg));
1722     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1723   }
1724   PetscFunctionReturn(PETSC_SUCCESS);
1725 }
1726 
1727 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1728 {
1729   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1730   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1731 
1732   PetscFunctionBegin;
1733   if (!Y->preallocated) {
1734     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1735   } else if (!aij->nz) {
1736     PetscInt nonew = aij->nonew;
1737     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1738     aij->nonew = nonew;
1739   }
1740   PetscCall(MatShift_Basic(Y, a));
1741   PetscFunctionReturn(PETSC_SUCCESS);
1742 }
1743 
1744 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1745 {
1746   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1747 
1748   PetscFunctionBegin;
1749   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1750   PetscCall(MatMissingDiagonal(a->A, missing, d));
1751   if (d) {
1752     PetscInt rstart;
1753     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1754     *d += rstart / A->rmap->bs;
1755   }
1756   PetscFunctionReturn(PETSC_SUCCESS);
1757 }
1758 
1759 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1760 {
1761   PetscFunctionBegin;
1762   *a = ((Mat_MPISBAIJ *)A->data)->A;
1763   PetscFunctionReturn(PETSC_SUCCESS);
1764 }
1765 
1766 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1767 {
1768   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1769 
1770   PetscFunctionBegin;
1771   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1772   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1773   PetscFunctionReturn(PETSC_SUCCESS);
1774 }
1775 
1776 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1777                                        MatGetRow_MPISBAIJ,
1778                                        MatRestoreRow_MPISBAIJ,
1779                                        MatMult_MPISBAIJ,
1780                                        /*  4*/ MatMultAdd_MPISBAIJ,
1781                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1782                                        MatMultAdd_MPISBAIJ,
1783                                        NULL,
1784                                        NULL,
1785                                        NULL,
1786                                        /* 10*/ NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        MatSOR_MPISBAIJ,
1790                                        MatTranspose_MPISBAIJ,
1791                                        /* 15*/ MatGetInfo_MPISBAIJ,
1792                                        MatEqual_MPISBAIJ,
1793                                        MatGetDiagonal_MPISBAIJ,
1794                                        MatDiagonalScale_MPISBAIJ,
1795                                        MatNorm_MPISBAIJ,
1796                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1797                                        MatAssemblyEnd_MPISBAIJ,
1798                                        MatSetOption_MPISBAIJ,
1799                                        MatZeroEntries_MPISBAIJ,
1800                                        /* 24*/ NULL,
1801                                        NULL,
1802                                        NULL,
1803                                        NULL,
1804                                        NULL,
1805                                        /* 29*/ MatSetUp_MPI_Hash,
1806                                        NULL,
1807                                        NULL,
1808                                        MatGetDiagonalBlock_MPISBAIJ,
1809                                        NULL,
1810                                        /* 34*/ MatDuplicate_MPISBAIJ,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        NULL,
1815                                        /* 39*/ MatAXPY_MPISBAIJ,
1816                                        MatCreateSubMatrices_MPISBAIJ,
1817                                        MatIncreaseOverlap_MPISBAIJ,
1818                                        MatGetValues_MPISBAIJ,
1819                                        MatCopy_MPISBAIJ,
1820                                        /* 44*/ NULL,
1821                                        MatScale_MPISBAIJ,
1822                                        MatShift_MPISBAIJ,
1823                                        NULL,
1824                                        NULL,
1825                                        /* 49*/ NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        /* 54*/ NULL,
1831                                        NULL,
1832                                        MatSetUnfactored_MPISBAIJ,
1833                                        NULL,
1834                                        MatSetValuesBlocked_MPISBAIJ,
1835                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        /* 64*/ NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        NULL,
1845                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1846                                        NULL,
1847                                        MatConvert_MPISBAIJ_Basic,
1848                                        NULL,
1849                                        NULL,
1850                                        /* 74*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        /* 79*/ NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        MatLoad_MPISBAIJ,
1860                                        /* 84*/ NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        NULL,
1865                                        /* 89*/ NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        NULL,
1870                                        /* 94*/ NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        /* 99*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        MatConjugate_MPISBAIJ,
1879                                        NULL,
1880                                        /*104*/ NULL,
1881                                        MatRealPart_MPISBAIJ,
1882                                        MatImaginaryPart_MPISBAIJ,
1883                                        MatGetRowUpperTriangular_MPISBAIJ,
1884                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1885                                        /*109*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        MatMissingDiagonal_MPISBAIJ,
1890                                        /*114*/ NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        /*119*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        /*124*/ NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        /*129*/ NULL,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        /*134*/ NULL,
1911                                        NULL,
1912                                        NULL,
1913                                        NULL,
1914                                        NULL,
1915                                        /*139*/ MatSetBlockSizes_Default,
1916                                        NULL,
1917                                        NULL,
1918                                        NULL,
1919                                        NULL,
1920                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1921                                        NULL,
1922                                        NULL,
1923                                        NULL,
1924                                        NULL,
1925                                        NULL,
1926                                        /*150*/ NULL,
1927                                        MatEliminateZeros_MPISBAIJ};
1928 
1929 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1930 {
1931   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1932   PetscInt      i, mbs, Mbs;
1933   PetscMPIInt   size;
1934 
1935   PetscFunctionBegin;
1936   if (B->hash_active) {
1937     B->ops[0]      = b->cops;
1938     B->hash_active = PETSC_FALSE;
1939   }
1940   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1941   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1942   PetscCall(PetscLayoutSetUp(B->rmap));
1943   PetscCall(PetscLayoutSetUp(B->cmap));
1944   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1945   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1946   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1947 
1948   mbs = B->rmap->n / bs;
1949   Mbs = B->rmap->N / bs;
1950   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1951 
1952   B->rmap->bs = bs;
1953   b->bs2      = bs * bs;
1954   b->mbs      = mbs;
1955   b->Mbs      = Mbs;
1956   b->nbs      = B->cmap->n / bs;
1957   b->Nbs      = B->cmap->N / bs;
1958 
1959   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1960   b->rstartbs = B->rmap->rstart / bs;
1961   b->rendbs   = B->rmap->rend / bs;
1962 
1963   b->cstartbs = B->cmap->rstart / bs;
1964   b->cendbs   = B->cmap->rend / bs;
1965 
1966 #if defined(PETSC_USE_CTABLE)
1967   PetscCall(PetscHMapIDestroy(&b->colmap));
1968 #else
1969   PetscCall(PetscFree(b->colmap));
1970 #endif
1971   PetscCall(PetscFree(b->garray));
1972   PetscCall(VecDestroy(&b->lvec));
1973   PetscCall(VecScatterDestroy(&b->Mvctx));
1974   PetscCall(VecDestroy(&b->slvec0));
1975   PetscCall(VecDestroy(&b->slvec0b));
1976   PetscCall(VecDestroy(&b->slvec1));
1977   PetscCall(VecDestroy(&b->slvec1a));
1978   PetscCall(VecDestroy(&b->slvec1b));
1979   PetscCall(VecScatterDestroy(&b->sMvctx));
1980 
1981   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1982   PetscCall(MatDestroy(&b->B));
1983   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1984   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1985   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1986 
1987   PetscCall(MatDestroy(&b->A));
1988   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1989   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1990   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1991 
1992   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1993   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1994 
1995   B->preallocated  = PETSC_TRUE;
1996   B->was_assembled = PETSC_FALSE;
1997   B->assembled     = PETSC_FALSE;
1998   PetscFunctionReturn(PETSC_SUCCESS);
1999 }
2000 
2001 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2002 {
2003   PetscInt        m, rstart, cend;
2004   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2005   const PetscInt *JJ          = NULL;
2006   PetscScalar    *values      = NULL;
2007   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2008   PetscBool       nooffprocentries;
2009 
2010   PetscFunctionBegin;
2011   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2012   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2013   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2014   PetscCall(PetscLayoutSetUp(B->rmap));
2015   PetscCall(PetscLayoutSetUp(B->cmap));
2016   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2017   m      = B->rmap->n / bs;
2018   rstart = B->rmap->rstart / bs;
2019   cend   = B->cmap->rend / bs;
2020 
2021   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2022   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2023   for (i = 0; i < m; i++) {
2024     nz = ii[i + 1] - ii[i];
2025     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2026     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2027     JJ = jj + ii[i];
2028     bd = 0;
2029     for (j = 0; j < nz; j++) {
2030       if (*JJ >= i + rstart) break;
2031       JJ++;
2032       bd++;
2033     }
2034     d = 0;
2035     for (; j < nz; j++) {
2036       if (*JJ++ >= cend) break;
2037       d++;
2038     }
2039     d_nnz[i] = d;
2040     o_nnz[i] = nz - d - bd;
2041     nz       = nz - bd;
2042     nz_max   = PetscMax(nz_max, nz);
2043   }
2044   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2045   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2046   PetscCall(PetscFree2(d_nnz, o_nnz));
2047 
2048   values = (PetscScalar *)V;
2049   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2050   for (i = 0; i < m; i++) {
2051     PetscInt        row   = i + rstart;
2052     PetscInt        ncols = ii[i + 1] - ii[i];
2053     const PetscInt *icols = jj + ii[i];
2054     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2055       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2056       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2057     } else { /* block ordering does not match so we can only insert one block at a time. */
2058       PetscInt j;
2059       for (j = 0; j < ncols; j++) {
2060         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2061         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2062       }
2063     }
2064   }
2065 
2066   if (!V) PetscCall(PetscFree(values));
2067   nooffprocentries    = B->nooffprocentries;
2068   B->nooffprocentries = PETSC_TRUE;
2069   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2070   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2071   B->nooffprocentries = nooffprocentries;
2072 
2073   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2074   PetscFunctionReturn(PETSC_SUCCESS);
2075 }
2076 
2077 /*MC
2078    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2079    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2080    the matrix is stored.
2081 
2082    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2083    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2084 
2085    Options Database Key:
2086 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2087 
2088    Level: beginner
2089 
2090    Note:
2091      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2092      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2093 
2094 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2095 M*/
2096 
2097 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2098 {
2099   Mat_MPISBAIJ *b;
2100   PetscBool     flg = PETSC_FALSE;
2101 
2102   PetscFunctionBegin;
2103   PetscCall(PetscNew(&b));
2104   B->data   = (void *)b;
2105   B->ops[0] = MatOps_Values;
2106 
2107   B->ops->destroy = MatDestroy_MPISBAIJ;
2108   B->ops->view    = MatView_MPISBAIJ;
2109   B->assembled    = PETSC_FALSE;
2110   B->insertmode   = NOT_SET_VALUES;
2111 
2112   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2113   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2114 
2115   /* build local table of row and column ownerships */
2116   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2117 
2118   /* build cache for off array entries formed */
2119   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2120 
2121   b->donotstash  = PETSC_FALSE;
2122   b->colmap      = NULL;
2123   b->garray      = NULL;
2124   b->roworiented = PETSC_TRUE;
2125 
2126   /* stuff used in block assembly */
2127   b->barray = NULL;
2128 
2129   /* stuff used for matrix vector multiply */
2130   b->lvec    = NULL;
2131   b->Mvctx   = NULL;
2132   b->slvec0  = NULL;
2133   b->slvec0b = NULL;
2134   b->slvec1  = NULL;
2135   b->slvec1a = NULL;
2136   b->slvec1b = NULL;
2137   b->sMvctx  = NULL;
2138 
2139   /* stuff for MatGetRow() */
2140   b->rowindices   = NULL;
2141   b->rowvalues    = NULL;
2142   b->getrowactive = PETSC_FALSE;
2143 
2144   /* hash table stuff */
2145   b->ht           = NULL;
2146   b->hd           = NULL;
2147   b->ht_size      = 0;
2148   b->ht_flag      = PETSC_FALSE;
2149   b->ht_fact      = 0;
2150   b->ht_total_ct  = 0;
2151   b->ht_insert_ct = 0;
2152 
2153   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2154   b->ijonly = PETSC_FALSE;
2155 
2156   b->in_loc = NULL;
2157   b->v_loc  = NULL;
2158   b->n_loc  = 0;
2159 
2160   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2161   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2162   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2163   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2164 #if defined(PETSC_HAVE_ELEMENTAL)
2165   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2166 #endif
2167 #if defined(PETSC_HAVE_SCALAPACK)
2168   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2169 #endif
2170   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2171   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2172 
2173   B->symmetric                   = PETSC_BOOL3_TRUE;
2174   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2175   B->symmetry_eternal            = PETSC_TRUE;
2176   B->structural_symmetry_eternal = PETSC_TRUE;
2177 #if defined(PETSC_USE_COMPLEX)
2178   B->hermitian = PETSC_BOOL3_FALSE;
2179 #else
2180   B->hermitian = PETSC_BOOL3_TRUE;
2181 #endif
2182 
2183   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2184   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2185   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2186   if (flg) {
2187     PetscReal fact = 1.39;
2188     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2189     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2190     if (fact <= 1.0) fact = 1.39;
2191     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2192     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2193   }
2194   PetscOptionsEnd();
2195   PetscFunctionReturn(PETSC_SUCCESS);
2196 }
2197 
2198 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2199 /*MC
2200    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2201 
2202    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2203    and `MATMPISBAIJ` otherwise.
2204 
2205    Options Database Key:
2206 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2207 
2208   Level: beginner
2209 
2210 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2211 M*/
2212 
2213 /*@C
2214   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2215   the user should preallocate the matrix storage by setting the parameters
2216   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2217   performance can be increased by more than a factor of 50.
2218 
2219   Collective
2220 
2221   Input Parameters:
2222 + B     - the matrix
2223 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2224           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2225 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2226            submatrix  (same for all local rows)
2227 . d_nnz - array containing the number of block nonzeros in the various block rows
2228            in the upper triangular and diagonal part of the in diagonal portion of the local
2229            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2230            for the diagonal entry and set a value even if it is zero.
2231 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2232            submatrix (same for all local rows).
2233 - o_nnz - array containing the number of nonzeros in the various block rows of the
2234            off-diagonal portion of the local submatrix that is right of the diagonal
2235            (possibly different for each block row) or `NULL`.
2236 
2237   Options Database Keys:
2238 + -mat_no_unroll  - uses code that does not unroll the loops in the
2239                      block calculations (much slower)
2240 - -mat_block_size - size of the blocks to use
2241 
2242   Level: intermediate
2243 
2244   Notes:
2245 
2246   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2247   than it must be used on all processors that share the object for that argument.
2248 
2249   If the *_nnz parameter is given then the *_nz parameter is ignored
2250 
2251   Storage Information:
2252   For a square global matrix we define each processor's diagonal portion
2253   to be its local rows and the corresponding columns (a square submatrix);
2254   each processor's off-diagonal portion encompasses the remainder of the
2255   local matrix (a rectangular submatrix).
2256 
2257   The user can specify preallocated storage for the diagonal part of
2258   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2259   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2260   memory allocation.  Likewise, specify preallocated storage for the
2261   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2262 
2263   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2264   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2265   You can also run with the option `-info` and look for messages with the string
2266   malloc in them to see if additional memory allocation was needed.
2267 
2268   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2269   the figure below we depict these three local rows and all columns (0-11).
2270 
2271 .vb
2272            0 1 2 3 4 5 6 7 8 9 10 11
2273           --------------------------
2274    row 3  |. . . d d d o o o o  o  o
2275    row 4  |. . . d d d o o o o  o  o
2276    row 5  |. . . d d d o o o o  o  o
2277           --------------------------
2278 .ve
2279 
2280   Thus, any entries in the d locations are stored in the d (diagonal)
2281   submatrix, and any entries in the o locations are stored in the
2282   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2283   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2284 
2285   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2286   plus the diagonal part of the d matrix,
2287   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2288 
2289   In general, for PDE problems in which most nonzeros are near the diagonal,
2290   one expects `d_nz` >> `o_nz`.
2291 
2292 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2293 @*/
2294 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2295 {
2296   PetscFunctionBegin;
2297   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2298   PetscValidType(B, 1);
2299   PetscValidLogicalCollectiveInt(B, bs, 2);
2300   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2301   PetscFunctionReturn(PETSC_SUCCESS);
2302 }
2303 
2304 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2305 /*@C
2306   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2307   (block compressed row).  For good matrix assembly performance
2308   the user should preallocate the matrix storage by setting the parameters
2309   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2310 
2311   Collective
2312 
2313   Input Parameters:
2314 + comm  - MPI communicator
2315 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2316           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2317 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2318            This value should be the same as the local size used in creating the
2319            y vector for the matrix-vector product y = Ax.
2320 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2321            This value should be the same as the local size used in creating the
2322            x vector for the matrix-vector product y = Ax.
2323 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2324 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2325 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2326            submatrix (same for all local rows)
2327 . d_nnz - array containing the number of block nonzeros in the various block rows
2328            in the upper triangular portion of the in diagonal portion of the local
2329            (possibly different for each block block row) or `NULL`.
2330            If you plan to factor the matrix you must leave room for the diagonal entry and
2331            set its value even if it is zero.
2332 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2333            submatrix (same for all local rows).
2334 - o_nnz - array containing the number of nonzeros in the various block rows of the
2335            off-diagonal portion of the local submatrix (possibly different for
2336            each block row) or `NULL`.
2337 
2338   Output Parameter:
2339 . A - the matrix
2340 
2341   Options Database Keys:
2342 + -mat_no_unroll  - uses code that does not unroll the loops in the
2343                      block calculations (much slower)
2344 . -mat_block_size - size of the blocks to use
2345 - -mat_mpi        - use the parallel matrix data structures even on one processor
2346                (defaults to using SeqBAIJ format on one processor)
2347 
2348   Level: intermediate
2349 
2350   Notes:
2351   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2352   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2353   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2354 
2355   The number of rows and columns must be divisible by blocksize.
2356   This matrix type does not support complex Hermitian operation.
2357 
2358   The user MUST specify either the local or global matrix dimensions
2359   (possibly both).
2360 
2361   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2362   than it must be used on all processors that share the object for that argument.
2363 
2364   If the *_nnz parameter is given then the *_nz parameter is ignored
2365 
2366   Storage Information:
2367   For a square global matrix we define each processor's diagonal portion
2368   to be its local rows and the corresponding columns (a square submatrix);
2369   each processor's off-diagonal portion encompasses the remainder of the
2370   local matrix (a rectangular submatrix).
2371 
2372   The user can specify preallocated storage for the diagonal part of
2373   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2374   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2375   memory allocation. Likewise, specify preallocated storage for the
2376   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2377 
2378   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2379   the figure below we depict these three local rows and all columns (0-11).
2380 
2381 .vb
2382            0 1 2 3 4 5 6 7 8 9 10 11
2383           --------------------------
2384    row 3  |. . . d d d o o o o  o  o
2385    row 4  |. . . d d d o o o o  o  o
2386    row 5  |. . . d d d o o o o  o  o
2387           --------------------------
2388 .ve
2389 
2390   Thus, any entries in the d locations are stored in the d (diagonal)
2391   submatrix, and any entries in the o locations are stored in the
2392   o (off-diagonal) submatrix. Note that the d matrix is stored in
2393   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2394 
2395   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2396   plus the diagonal part of the d matrix,
2397   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2398   In general, for PDE problems in which most nonzeros are near the diagonal,
2399   one expects `d_nz` >> `o_nz`.
2400 
2401 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2402 @*/
2403 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2404 {
2405   PetscMPIInt size;
2406 
2407   PetscFunctionBegin;
2408   PetscCall(MatCreate(comm, A));
2409   PetscCall(MatSetSizes(*A, m, n, M, N));
2410   PetscCallMPI(MPI_Comm_size(comm, &size));
2411   if (size > 1) {
2412     PetscCall(MatSetType(*A, MATMPISBAIJ));
2413     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2414   } else {
2415     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2416     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2417   }
2418   PetscFunctionReturn(PETSC_SUCCESS);
2419 }
2420 
2421 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2422 {
2423   Mat           mat;
2424   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2425   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2426   PetscScalar  *array;
2427 
2428   PetscFunctionBegin;
2429   *newmat = NULL;
2430 
2431   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2432   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2433   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2434   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2435   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2436 
2437   mat->factortype   = matin->factortype;
2438   mat->preallocated = PETSC_TRUE;
2439   mat->assembled    = PETSC_TRUE;
2440   mat->insertmode   = NOT_SET_VALUES;
2441 
2442   a      = (Mat_MPISBAIJ *)mat->data;
2443   a->bs2 = oldmat->bs2;
2444   a->mbs = oldmat->mbs;
2445   a->nbs = oldmat->nbs;
2446   a->Mbs = oldmat->Mbs;
2447   a->Nbs = oldmat->Nbs;
2448 
2449   a->size         = oldmat->size;
2450   a->rank         = oldmat->rank;
2451   a->donotstash   = oldmat->donotstash;
2452   a->roworiented  = oldmat->roworiented;
2453   a->rowindices   = NULL;
2454   a->rowvalues    = NULL;
2455   a->getrowactive = PETSC_FALSE;
2456   a->barray       = NULL;
2457   a->rstartbs     = oldmat->rstartbs;
2458   a->rendbs       = oldmat->rendbs;
2459   a->cstartbs     = oldmat->cstartbs;
2460   a->cendbs       = oldmat->cendbs;
2461 
2462   /* hash table stuff */
2463   a->ht           = NULL;
2464   a->hd           = NULL;
2465   a->ht_size      = 0;
2466   a->ht_flag      = oldmat->ht_flag;
2467   a->ht_fact      = oldmat->ht_fact;
2468   a->ht_total_ct  = 0;
2469   a->ht_insert_ct = 0;
2470 
2471   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2472   if (oldmat->colmap) {
2473 #if defined(PETSC_USE_CTABLE)
2474     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2475 #else
2476     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2477     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2478 #endif
2479   } else a->colmap = NULL;
2480 
2481   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2482     PetscCall(PetscMalloc1(len, &a->garray));
2483     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2484   } else a->garray = NULL;
2485 
2486   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2487   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2488   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2489 
2490   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2491   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2492 
2493   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2494   PetscCall(VecGetArray(a->slvec1, &array));
2495   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2496   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2497   PetscCall(VecRestoreArray(a->slvec1, &array));
2498   PetscCall(VecGetArray(a->slvec0, &array));
2499   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2500   PetscCall(VecRestoreArray(a->slvec0, &array));
2501 
2502   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2503   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2504   a->sMvctx = oldmat->sMvctx;
2505 
2506   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2507   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2508   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2509   *newmat = mat;
2510   PetscFunctionReturn(PETSC_SUCCESS);
2511 }
2512 
2513 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2514 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2515 
2516 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2517 {
2518   PetscBool isbinary;
2519 
2520   PetscFunctionBegin;
2521   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2522   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2523   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2524   PetscFunctionReturn(PETSC_SUCCESS);
2525 }
2526 
2527 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2528 {
2529   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2530   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2531   PetscReal     atmp;
2532   PetscReal    *work, *svalues, *rvalues;
2533   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2534   PetscMPIInt   rank, size;
2535   PetscInt     *rowners_bs, dest, count, source;
2536   PetscScalar  *va;
2537   MatScalar    *ba;
2538   MPI_Status    stat;
2539 
2540   PetscFunctionBegin;
2541   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2542   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2543   PetscCall(VecGetArray(v, &va));
2544 
2545   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2546   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2547 
2548   bs  = A->rmap->bs;
2549   mbs = a->mbs;
2550   Mbs = a->Mbs;
2551   ba  = b->a;
2552   bi  = b->i;
2553   bj  = b->j;
2554 
2555   /* find ownerships */
2556   rowners_bs = A->rmap->range;
2557 
2558   /* each proc creates an array to be distributed */
2559   PetscCall(PetscCalloc1(bs * Mbs, &work));
2560 
2561   /* row_max for B */
2562   if (rank != size - 1) {
2563     for (i = 0; i < mbs; i++) {
2564       ncols = bi[1] - bi[0];
2565       bi++;
2566       brow = bs * i;
2567       for (j = 0; j < ncols; j++) {
2568         bcol = bs * (*bj);
2569         for (kcol = 0; kcol < bs; kcol++) {
2570           col = bcol + kcol;           /* local col index */
2571           col += rowners_bs[rank + 1]; /* global col index */
2572           for (krow = 0; krow < bs; krow++) {
2573             atmp = PetscAbsScalar(*ba);
2574             ba++;
2575             row = brow + krow; /* local row index */
2576             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2577             if (work[col] < atmp) work[col] = atmp;
2578           }
2579         }
2580         bj++;
2581       }
2582     }
2583 
2584     /* send values to its owners */
2585     for (dest = rank + 1; dest < size; dest++) {
2586       svalues = work + rowners_bs[dest];
2587       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2588       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2589     }
2590   }
2591 
2592   /* receive values */
2593   if (rank) {
2594     rvalues = work;
2595     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2596     for (source = 0; source < rank; source++) {
2597       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2598       /* process values */
2599       for (i = 0; i < count; i++) {
2600         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2601       }
2602     }
2603   }
2604 
2605   PetscCall(VecRestoreArray(v, &va));
2606   PetscCall(PetscFree(work));
2607   PetscFunctionReturn(PETSC_SUCCESS);
2608 }
2609 
2610 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2611 {
2612   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2613   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2614   PetscScalar       *x, *ptr, *from;
2615   Vec                bb1;
2616   const PetscScalar *b;
2617 
2618   PetscFunctionBegin;
2619   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2620   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2621 
2622   if (flag == SOR_APPLY_UPPER) {
2623     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2624     PetscFunctionReturn(PETSC_SUCCESS);
2625   }
2626 
2627   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2628     if (flag & SOR_ZERO_INITIAL_GUESS) {
2629       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2630       its--;
2631     }
2632 
2633     PetscCall(VecDuplicate(bb, &bb1));
2634     while (its--) {
2635       /* lower triangular part: slvec0b = - B^T*xx */
2636       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2637 
2638       /* copy xx into slvec0a */
2639       PetscCall(VecGetArray(mat->slvec0, &ptr));
2640       PetscCall(VecGetArray(xx, &x));
2641       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2642       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2643 
2644       PetscCall(VecScale(mat->slvec0, -1.0));
2645 
2646       /* copy bb into slvec1a */
2647       PetscCall(VecGetArray(mat->slvec1, &ptr));
2648       PetscCall(VecGetArrayRead(bb, &b));
2649       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2650       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2651 
2652       /* set slvec1b = 0 */
2653       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2654       PetscCall(VecZeroEntries(mat->slvec1b));
2655 
2656       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2657       PetscCall(VecRestoreArray(xx, &x));
2658       PetscCall(VecRestoreArrayRead(bb, &b));
2659       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2660 
2661       /* upper triangular part: bb1 = bb1 - B*x */
2662       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2663 
2664       /* local diagonal sweep */
2665       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2666     }
2667     PetscCall(VecDestroy(&bb1));
2668   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2669     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2670   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2671     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2672   } else if (flag & SOR_EISENSTAT) {
2673     Vec                xx1;
2674     PetscBool          hasop;
2675     const PetscScalar *diag;
2676     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2677     PetscInt           i, n;
2678 
2679     if (!mat->xx1) {
2680       PetscCall(VecDuplicate(bb, &mat->xx1));
2681       PetscCall(VecDuplicate(bb, &mat->bb1));
2682     }
2683     xx1 = mat->xx1;
2684     bb1 = mat->bb1;
2685 
2686     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2687 
2688     if (!mat->diag) {
2689       /* this is wrong for same matrix with new nonzero values */
2690       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2691       PetscCall(MatGetDiagonal(matin, mat->diag));
2692     }
2693     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2694 
2695     if (hasop) {
2696       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2697       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2698     } else {
2699       /*
2700           These two lines are replaced by code that may be a bit faster for a good compiler
2701       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2702       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2703       */
2704       PetscCall(VecGetArray(mat->slvec1a, &sl));
2705       PetscCall(VecGetArrayRead(mat->diag, &diag));
2706       PetscCall(VecGetArrayRead(bb, &b));
2707       PetscCall(VecGetArray(xx, &x));
2708       PetscCall(VecGetLocalSize(xx, &n));
2709       if (omega == 1.0) {
2710         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2711         PetscCall(PetscLogFlops(2.0 * n));
2712       } else {
2713         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2714         PetscCall(PetscLogFlops(3.0 * n));
2715       }
2716       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2717       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2718       PetscCall(VecRestoreArrayRead(bb, &b));
2719       PetscCall(VecRestoreArray(xx, &x));
2720     }
2721 
2722     /* multiply off-diagonal portion of matrix */
2723     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2724     PetscCall(VecZeroEntries(mat->slvec1b));
2725     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2726     PetscCall(VecGetArray(mat->slvec0, &from));
2727     PetscCall(VecGetArray(xx, &x));
2728     PetscCall(PetscArraycpy(from, x, bs * mbs));
2729     PetscCall(VecRestoreArray(mat->slvec0, &from));
2730     PetscCall(VecRestoreArray(xx, &x));
2731     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2732     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2733     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2734 
2735     /* local sweep */
2736     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2737     PetscCall(VecAXPY(xx, 1.0, xx1));
2738   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2739   PetscFunctionReturn(PETSC_SUCCESS);
2740 }
2741 
2742 /*@
2743   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2744   CSR format the local rows.
2745 
2746   Collective
2747 
2748   Input Parameters:
2749 + comm - MPI communicator
2750 . bs   - the block size, only a block size of 1 is supported
2751 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2752 . n    - This value should be the same as the local size used in creating the
2753        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2754        calculated if `N` is given) For square matrices `n` is almost always `m`.
2755 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2756 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2757 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2758 . j    - column indices
2759 - a    - matrix values
2760 
2761   Output Parameter:
2762 . mat - the matrix
2763 
2764   Level: intermediate
2765 
2766   Notes:
2767   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2768   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2769   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2770 
2771   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2772 
2773 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2774           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2775 @*/
2776 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2777 {
2778   PetscFunctionBegin;
2779   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2780   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2781   PetscCall(MatCreate(comm, mat));
2782   PetscCall(MatSetSizes(*mat, m, n, M, N));
2783   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2784   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2785   PetscFunctionReturn(PETSC_SUCCESS);
2786 }
2787 
2788 /*@C
2789   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2790 
2791   Collective
2792 
2793   Input Parameters:
2794 + B  - the matrix
2795 . bs - the block size
2796 . i  - the indices into `j` for the start of each local row (starts with zero)
2797 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2798 - v  - optional values in the matrix
2799 
2800   Level: advanced
2801 
2802   Notes:
2803   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2804   and usually the numerical values as well
2805 
2806   Any entries below the diagonal are ignored
2807 
2808 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2809 @*/
2810 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2811 {
2812   PetscFunctionBegin;
2813   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2814   PetscFunctionReturn(PETSC_SUCCESS);
2815 }
2816 
2817 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2818 {
2819   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2820   PetscInt    *indx;
2821   PetscScalar *values;
2822 
2823   PetscFunctionBegin;
2824   PetscCall(MatGetSize(inmat, &m, &N));
2825   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2826     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2827     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2828     PetscInt     *bindx, rmax = a->rmax, j;
2829     PetscMPIInt   rank, size;
2830 
2831     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2832     mbs = m / bs;
2833     Nbs = N / cbs;
2834     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2835     nbs = n / cbs;
2836 
2837     PetscCall(PetscMalloc1(rmax, &bindx));
2838     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2839 
2840     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2841     PetscCallMPI(MPI_Comm_rank(comm, &size));
2842     if (rank == size - 1) {
2843       /* Check sum(nbs) = Nbs */
2844       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2845     }
2846 
2847     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2848     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2849     for (i = 0; i < mbs; i++) {
2850       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2851       nnz = nnz / bs;
2852       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2853       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2854       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2855     }
2856     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2857     PetscCall(PetscFree(bindx));
2858 
2859     PetscCall(MatCreate(comm, outmat));
2860     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2861     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2862     PetscCall(MatSetType(*outmat, MATSBAIJ));
2863     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2864     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2865     MatPreallocateEnd(dnz, onz);
2866   }
2867 
2868   /* numeric phase */
2869   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2870   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2871 
2872   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2873   for (i = 0; i < m; i++) {
2874     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2875     Ii = i + rstart;
2876     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2877     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2878   }
2879   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2880   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2881   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2882   PetscFunctionReturn(PETSC_SUCCESS);
2883 }
2884