xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b6d8efd844103bbe70b66cc4fbc58f90317efaeb)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11 #if defined(PETSC_USE_LOG)
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13 #endif
14   PetscCall(MatStashDestroy_Private(&mat->stash));
15   PetscCall(MatStashDestroy_Private(&mat->bstash));
16   PetscCall(MatDestroy(&baij->A));
17   PetscCall(MatDestroy(&baij->B));
18 #if defined(PETSC_USE_CTABLE)
19   PetscCall(PetscHMapIDestroy(&baij->colmap));
20 #else
21   PetscCall(PetscFree(baij->colmap));
22 #endif
23   PetscCall(PetscFree(baij->garray));
24   PetscCall(VecDestroy(&baij->lvec));
25   PetscCall(VecScatterDestroy(&baij->Mvctx));
26   PetscCall(VecDestroy(&baij->slvec0));
27   PetscCall(VecDestroy(&baij->slvec0b));
28   PetscCall(VecDestroy(&baij->slvec1));
29   PetscCall(VecDestroy(&baij->slvec1a));
30   PetscCall(VecDestroy(&baij->slvec1b));
31   PetscCall(VecScatterDestroy(&baij->sMvctx));
32   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
33   PetscCall(PetscFree(baij->barray));
34   PetscCall(PetscFree(baij->hd));
35   PetscCall(VecDestroy(&baij->diag));
36   PetscCall(VecDestroy(&baij->bb1));
37   PetscCall(VecDestroy(&baij->xx1));
38 #if defined(PETSC_USE_REAL_MAT_SINGLE)
39   PetscCall(PetscFree(baij->setvaluescopy));
40 #endif
41   PetscCall(PetscFree(baij->in_loc));
42   PetscCall(PetscFree(baij->v_loc));
43   PetscCall(PetscFree(baij->rangebs));
44   PetscCall(PetscFree(mat->data));
45 
46   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
51 #if defined(PETSC_HAVE_ELEMENTAL)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
53 #endif
54 #if defined(PETSC_HAVE_SCALAPACK)
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
56 #endif
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
58   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
63 #define TYPE SBAIJ
64 #define TYPE_SBAIJ
65 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
66 #undef TYPE
67 #undef TYPE_SBAIJ
68 
69 #if defined(PETSC_HAVE_ELEMENTAL)
70 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
71 #endif
72 #if defined(PETSC_HAVE_SCALAPACK)
73 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74 #endif
75 
76 /* This could be moved to matimpl.h */
77 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78 {
79   Mat       preallocator;
80   PetscInt  r, rstart, rend;
81   PetscInt  bs, i, m, n, M, N;
82   PetscBool cong = PETSC_TRUE;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
86   PetscValidLogicalCollectiveInt(B, nm, 2);
87   for (i = 0; i < nm; i++) {
88     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
89     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
90     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91   }
92   PetscValidLogicalCollectiveBool(B, fill, 5);
93   PetscCall(MatGetBlockSize(B, &bs));
94   PetscCall(MatGetSize(B, &M, &N));
95   PetscCall(MatGetLocalSize(B, &m, &n));
96   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
97   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
98   PetscCall(MatSetBlockSize(preallocator, bs));
99   PetscCall(MatSetSizes(preallocator, m, n, M, N));
100   PetscCall(MatSetUp(preallocator));
101   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102   for (r = rstart; r < rend; ++r) {
103     PetscInt           ncols;
104     const PetscInt    *row;
105     const PetscScalar *vals;
106 
107     for (i = 0; i < nm; i++) {
108       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
109       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
110       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
111       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112     }
113   }
114   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
116   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
117   PetscCall(MatDestroy(&preallocator));
118   PetscFunctionReturn(PETSC_SUCCESS);
119 }
120 
121 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122 {
123   Mat      B;
124   PetscInt r;
125 
126   PetscFunctionBegin;
127   if (reuse != MAT_REUSE_MATRIX) {
128     PetscBool symm = PETSC_TRUE, isdense;
129     PetscInt  bs;
130 
131     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
132     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
133     PetscCall(MatSetType(B, newtype));
134     PetscCall(MatGetBlockSize(A, &bs));
135     PetscCall(MatSetBlockSize(B, bs));
136     PetscCall(PetscLayoutSetUp(B->rmap));
137     PetscCall(PetscLayoutSetUp(B->cmap));
138     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
139     if (!isdense) {
140       PetscCall(MatGetRowUpperTriangular(A));
141       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
142       PetscCall(MatRestoreRowUpperTriangular(A));
143     } else {
144       PetscCall(MatSetUp(B));
145     }
146   } else {
147     B = *newmat;
148     PetscCall(MatZeroEntries(B));
149   }
150 
151   PetscCall(MatGetRowUpperTriangular(A));
152   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153     PetscInt           ncols;
154     const PetscInt    *row;
155     const PetscScalar *vals;
156 
157     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
158     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159 #if defined(PETSC_USE_COMPLEX)
160     if (A->hermitian == PETSC_BOOL3_TRUE) {
161       PetscInt i;
162       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163     } else {
164       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165     }
166 #else
167     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168 #endif
169     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170   }
171   PetscCall(MatRestoreRowUpperTriangular(A));
172   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
173   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
174 
175   if (reuse == MAT_INPLACE_MATRIX) {
176     PetscCall(MatHeaderReplace(A, &B));
177   } else {
178     *newmat = B;
179   }
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
183 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184 {
185   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
186 
187   PetscFunctionBegin;
188   PetscCall(MatStoreValues(aij->A));
189   PetscCall(MatStoreValues(aij->B));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194 {
195   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
196 
197   PetscFunctionBegin;
198   PetscCall(MatRetrieveValues(aij->A));
199   PetscCall(MatRetrieveValues(aij->B));
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204   { \
205     brow = row / bs; \
206     rp   = aj + ai[brow]; \
207     ap   = aa + bs2 * ai[brow]; \
208     rmax = aimax[brow]; \
209     nrow = ailen[brow]; \
210     bcol = col / bs; \
211     ridx = row % bs; \
212     cidx = col % bs; \
213     low  = 0; \
214     high = nrow; \
215     while (high - low > 3) { \
216       t = (low + high) / 2; \
217       if (rp[t] > bcol) high = t; \
218       else low = t; \
219     } \
220     for (_i = low; _i < high; _i++) { \
221       if (rp[_i] > bcol) break; \
222       if (rp[_i] == bcol) { \
223         bap = ap + bs2 * _i + bs * cidx + ridx; \
224         if (addv == ADD_VALUES) *bap += value; \
225         else *bap = value; \
226         goto a_noinsert; \
227       } \
228     } \
229     if (a->nonew == 1) goto a_noinsert; \
230     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232     N = nrow++ - 1; \
233     /* shift up all the later entries in this row */ \
234     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
235     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
236     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237     rp[_i]                          = bcol; \
238     ap[bs2 * _i + bs * cidx + ridx] = value; \
239     A->nonzerostate++; \
240   a_noinsert:; \
241     ailen[brow] = nrow; \
242   }
243 
244 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245   { \
246     brow = row / bs; \
247     rp   = bj + bi[brow]; \
248     ap   = ba + bs2 * bi[brow]; \
249     rmax = bimax[brow]; \
250     nrow = bilen[brow]; \
251     bcol = col / bs; \
252     ridx = row % bs; \
253     cidx = col % bs; \
254     low  = 0; \
255     high = nrow; \
256     while (high - low > 3) { \
257       t = (low + high) / 2; \
258       if (rp[t] > bcol) high = t; \
259       else low = t; \
260     } \
261     for (_i = low; _i < high; _i++) { \
262       if (rp[_i] > bcol) break; \
263       if (rp[_i] == bcol) { \
264         bap = ap + bs2 * _i + bs * cidx + ridx; \
265         if (addv == ADD_VALUES) *bap += value; \
266         else *bap = value; \
267         goto b_noinsert; \
268       } \
269     } \
270     if (b->nonew == 1) goto b_noinsert; \
271     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273     N = nrow++ - 1; \
274     /* shift up all the later entries in this row */ \
275     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
276     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
277     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278     rp[_i]                          = bcol; \
279     ap[bs2 * _i + bs * cidx + ridx] = value; \
280     B->nonzerostate++; \
281   b_noinsert:; \
282     bilen[brow] = nrow; \
283   }
284 
285 /* Only add/insert a(i,j) with i<=j (blocks).
286    Any a(i,j) with i>j input by user is ignored or generates an error
287 */
288 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289 {
290   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291   MatScalar     value;
292   PetscBool     roworiented = baij->roworiented;
293   PetscInt      i, j, row, col;
294   PetscInt      rstart_orig = mat->rmap->rstart;
295   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
297 
298   /* Some Variables required in the macro */
299   Mat           A     = baij->A;
300   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
301   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302   MatScalar    *aa = a->a;
303 
304   Mat          B     = baij->B;
305   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
306   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307   MatScalar   *ba = b->a;
308 
309   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
310   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
311   MatScalar *ap, *bap;
312 
313   /* for stash */
314   PetscInt   n_loc, *in_loc = NULL;
315   MatScalar *v_loc = NULL;
316 
317   PetscFunctionBegin;
318   if (!baij->donotstash) {
319     if (n > baij->n_loc) {
320       PetscCall(PetscFree(baij->in_loc));
321       PetscCall(PetscFree(baij->v_loc));
322       PetscCall(PetscMalloc1(n, &baij->in_loc));
323       PetscCall(PetscMalloc1(n, &baij->v_loc));
324 
325       baij->n_loc = n;
326     }
327     in_loc = baij->in_loc;
328     v_loc  = baij->v_loc;
329   }
330 
331   for (i = 0; i < m; i++) {
332     if (im[i] < 0) continue;
333     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335       row = im[i] - rstart_orig;                     /* local row index */
336       for (j = 0; j < n; j++) {
337         if (im[i] / bs > in[j] / bs) {
338           if (a->ignore_ltriangular) {
339             continue; /* ignore lower triangular blocks */
340           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
341         }
342         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343           col  = in[j] - cstart_orig;                    /* local col index */
344           brow = row / bs;
345           bcol = col / bs;
346           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347           if (roworiented) value = v[i * n + j];
348           else value = v[i + j * m];
349           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
350           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351         } else if (in[j] < 0) {
352           continue;
353         } else {
354           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355           /* off-diag entry (B) */
356           if (mat->was_assembled) {
357             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358 #if defined(PETSC_USE_CTABLE)
359             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
360             col = col - 1;
361 #else
362             col = baij->colmap[in[j] / bs] - 1;
363 #endif
364             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
365               PetscCall(MatDisAssemble_MPISBAIJ(mat));
366               col = in[j];
367               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368               B     = baij->B;
369               b     = (Mat_SeqBAIJ *)(B)->data;
370               bimax = b->imax;
371               bi    = b->i;
372               bilen = b->ilen;
373               bj    = b->j;
374               ba    = b->a;
375             } else col += in[j] % bs;
376           } else col = in[j];
377           if (roworiented) value = v[i * n + j];
378           else value = v[i + j * m];
379           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
380           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381         }
382       }
383     } else { /* off processor entry */
384       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385       if (!baij->donotstash) {
386         mat->assembled = PETSC_FALSE;
387         n_loc          = 0;
388         for (j = 0; j < n; j++) {
389           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390           in_loc[n_loc] = in[j];
391           if (roworiented) {
392             v_loc[n_loc] = v[i * n + j];
393           } else {
394             v_loc[n_loc] = v[j * m + i];
395           }
396           n_loc++;
397         }
398         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399       }
400     }
401   }
402   PetscFunctionReturn(PETSC_SUCCESS);
403 }
404 
405 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406 {
407   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
408   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
409   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
410   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
411   PetscBool          roworiented = a->roworiented;
412   const PetscScalar *value       = v;
413   MatScalar         *ap, *aa = a->a, *bap;
414 
415   PetscFunctionBegin;
416   if (col < row) {
417     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
418     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
419   }
420   rp    = aj + ai[row];
421   ap    = aa + bs2 * ai[row];
422   rmax  = imax[row];
423   nrow  = ailen[row];
424   value = v;
425   low   = 0;
426   high  = nrow;
427 
428   while (high - low > 7) {
429     t = (low + high) / 2;
430     if (rp[t] > col) high = t;
431     else low = t;
432   }
433   for (i = low; i < high; i++) {
434     if (rp[i] > col) break;
435     if (rp[i] == col) {
436       bap = ap + bs2 * i;
437       if (roworiented) {
438         if (is == ADD_VALUES) {
439           for (ii = 0; ii < bs; ii++) {
440             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
441           }
442         } else {
443           for (ii = 0; ii < bs; ii++) {
444             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
445           }
446         }
447       } else {
448         if (is == ADD_VALUES) {
449           for (ii = 0; ii < bs; ii++) {
450             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
451           }
452         } else {
453           for (ii = 0; ii < bs; ii++) {
454             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
455           }
456         }
457       }
458       goto noinsert2;
459     }
460   }
461   if (nonew == 1) goto noinsert2;
462   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
463   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
464   N = nrow++ - 1;
465   high++;
466   /* shift up all the later entries in this row */
467   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
468   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
469   rp[i] = col;
470   bap   = ap + bs2 * i;
471   if (roworiented) {
472     for (ii = 0; ii < bs; ii++) {
473       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
474     }
475   } else {
476     for (ii = 0; ii < bs; ii++) {
477       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
478     }
479   }
480 noinsert2:;
481   ailen[row] = nrow;
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 /*
486    This routine is exactly duplicated in mpibaij.c
487 */
488 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489 {
490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
491   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
492   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
493   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
494   PetscBool          roworiented = a->roworiented;
495   const PetscScalar *value       = v;
496   MatScalar         *ap, *aa = a->a, *bap;
497 
498   PetscFunctionBegin;
499   rp    = aj + ai[row];
500   ap    = aa + bs2 * ai[row];
501   rmax  = imax[row];
502   nrow  = ailen[row];
503   low   = 0;
504   high  = nrow;
505   value = v;
506   while (high - low > 7) {
507     t = (low + high) / 2;
508     if (rp[t] > col) high = t;
509     else low = t;
510   }
511   for (i = low; i < high; i++) {
512     if (rp[i] > col) break;
513     if (rp[i] == col) {
514       bap = ap + bs2 * i;
515       if (roworiented) {
516         if (is == ADD_VALUES) {
517           for (ii = 0; ii < bs; ii++) {
518             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
519           }
520         } else {
521           for (ii = 0; ii < bs; ii++) {
522             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
523           }
524         }
525       } else {
526         if (is == ADD_VALUES) {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
529             bap += bs;
530           }
531         } else {
532           for (ii = 0; ii < bs; ii++, value += bs) {
533             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
534             bap += bs;
535           }
536         }
537       }
538       goto noinsert2;
539     }
540   }
541   if (nonew == 1) goto noinsert2;
542   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
543   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
544   N = nrow++ - 1;
545   high++;
546   /* shift up all the later entries in this row */
547   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
548   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
549   rp[i] = col;
550   bap   = ap + bs2 * i;
551   if (roworiented) {
552     for (ii = 0; ii < bs; ii++) {
553       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
554     }
555   } else {
556     for (ii = 0; ii < bs; ii++) {
557       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
558     }
559   }
560 noinsert2:;
561   ailen[row] = nrow;
562   PetscFunctionReturn(PETSC_SUCCESS);
563 }
564 
565 /*
566     This routine could be optimized by removing the need for the block copy below and passing stride information
567   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
568 */
569 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570 {
571   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
572   const MatScalar *value;
573   MatScalar       *barray      = baij->barray;
574   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
576   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
578 
579   PetscFunctionBegin;
580   if (!barray) {
581     PetscCall(PetscMalloc1(bs2, &barray));
582     baij->barray = barray;
583   }
584 
585   if (roworiented) {
586     stepval = (n - 1) * bs;
587   } else {
588     stepval = (m - 1) * bs;
589   }
590   for (i = 0; i < m; i++) {
591     if (im[i] < 0) continue;
592     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
593     if (im[i] >= rstart && im[i] < rend) {
594       row = im[i] - rstart;
595       for (j = 0; j < n; j++) {
596         if (im[i] > in[j]) {
597           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599         }
600         /* If NumCol = 1 then a copy is not required */
601         if ((roworiented) && (n == 1)) {
602           barray = (MatScalar *)v + i * bs2;
603         } else if ((!roworiented) && (m == 1)) {
604           barray = (MatScalar *)v + j * bs2;
605         } else { /* Here a copy is required */
606           if (roworiented) {
607             value = v + i * (stepval + bs) * bs + j * bs;
608           } else {
609             value = v + j * (stepval + bs) * bs + i * bs;
610           }
611           for (ii = 0; ii < bs; ii++, value += stepval) {
612             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
613           }
614           barray -= bs2;
615         }
616 
617         if (in[j] >= cstart && in[j] < cend) {
618           col = in[j] - cstart;
619           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620         } else if (in[j] < 0) {
621           continue;
622         } else {
623           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
624           if (mat->was_assembled) {
625             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
626 
627 #if defined(PETSC_USE_DEBUG)
628   #if defined(PETSC_USE_CTABLE)
629             {
630               PetscInt data;
631               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
632               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
633             }
634   #else
635             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
636   #endif
637 #endif
638 #if defined(PETSC_USE_CTABLE)
639             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
640             col = (col - 1) / bs;
641 #else
642             col = (baij->colmap[in[j]] - 1) / bs;
643 #endif
644             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
645               PetscCall(MatDisAssemble_MPISBAIJ(mat));
646               col = in[j];
647             }
648           } else col = in[j];
649           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
650         }
651       }
652     } else {
653       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
654       if (!baij->donotstash) {
655         if (roworiented) {
656           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         } else {
658           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
659         }
660       }
661     }
662   }
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667 {
668   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
671 
672   PetscFunctionBegin;
673   for (i = 0; i < m; i++) {
674     if (idxm[i] < 0) continue; /* negative row */
675     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677       row = idxm[i] - bsrstart;
678       for (j = 0; j < n; j++) {
679         if (idxn[j] < 0) continue; /* negative column */
680         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681         if (idxn[j] >= bscstart && idxn[j] < bscend) {
682           col = idxn[j] - bscstart;
683           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684         } else {
685           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686 #if defined(PETSC_USE_CTABLE)
687           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688           data--;
689 #else
690           data = baij->colmap[idxn[j] / bs] - 1;
691 #endif
692           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693           else {
694             col = data + idxn[j] % bs;
695             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696           }
697         }
698       }
699     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705 {
706   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707   PetscReal     sum[2], *lnorm2;
708 
709   PetscFunctionBegin;
710   if (baij->size == 1) {
711     PetscCall(MatNorm(baij->A, type, norm));
712   } else {
713     if (type == NORM_FROBENIUS) {
714       PetscCall(PetscMalloc1(2, &lnorm2));
715       PetscCall(MatNorm(baij->A, type, lnorm2));
716       *lnorm2 = (*lnorm2) * (*lnorm2);
717       lnorm2++; /* squar power of norm(A) */
718       PetscCall(MatNorm(baij->B, type, lnorm2));
719       *lnorm2 = (*lnorm2) * (*lnorm2);
720       lnorm2--; /* squar power of norm(B) */
721       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
722       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
723       PetscCall(PetscFree(lnorm2));
724     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
725       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
726       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
727       PetscReal    *rsum, *rsum2, vabs;
728       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
730       MatScalar    *v;
731 
732       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
733       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
734       /* Amat */
735       v  = amat->a;
736       jj = amat->j;
737       for (brow = 0; brow < mbs; brow++) {
738         grow = bs * (rstart + brow);
739         nz   = amat->i[brow + 1] - amat->i[brow];
740         for (bcol = 0; bcol < nz; bcol++) {
741           gcol = bs * (rstart + *jj);
742           jj++;
743           for (col = 0; col < bs; col++) {
744             for (row = 0; row < bs; row++) {
745               vabs = PetscAbsScalar(*v);
746               v++;
747               rsum[gcol + col] += vabs;
748               /* non-diagonal block */
749               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
750             }
751           }
752         }
753         PetscCall(PetscLogFlops(nz * bs * bs));
754       }
755       /* Bmat */
756       v  = bmat->a;
757       jj = bmat->j;
758       for (brow = 0; brow < mbs; brow++) {
759         grow = bs * (rstart + brow);
760         nz   = bmat->i[brow + 1] - bmat->i[brow];
761         for (bcol = 0; bcol < nz; bcol++) {
762           gcol = bs * garray[*jj];
763           jj++;
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               vabs = PetscAbsScalar(*v);
767               v++;
768               rsum[gcol + col] += vabs;
769               rsum[grow + row] += vabs;
770             }
771           }
772         }
773         PetscCall(PetscLogFlops(nz * bs * bs));
774       }
775       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
776       *norm = 0.0;
777       for (col = 0; col < mat->cmap->N; col++) {
778         if (rsum2[col] > *norm) *norm = rsum2[col];
779       }
780       PetscCall(PetscFree2(rsum, rsum2));
781     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782   }
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787 {
788   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
789   PetscInt      nstash, reallocs;
790 
791   PetscFunctionBegin;
792   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
793 
794   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
795   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
799   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
800   PetscFunctionReturn(PETSC_SUCCESS);
801 }
802 
803 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804 {
805   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
807   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
808   PetscInt     *row, *col;
809   PetscBool     other_disassembled;
810   PetscMPIInt   n;
811   PetscBool     r1, r2, r3;
812   MatScalar    *val;
813 
814   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815   PetscFunctionBegin;
816   if (!baij->donotstash && !mat->nooffprocentries) {
817     while (1) {
818       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819       if (!flg) break;
820 
821       for (i = 0; i < n;) {
822         /* Now identify the consecutive vals belonging to the same row */
823         for (j = i, rstart = row[j]; j < n; j++) {
824           if (row[j] != rstart) break;
825         }
826         if (j < n) ncols = j - i;
827         else ncols = n - i;
828         /* Now assemble all these values with a single function call */
829         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830         i = j;
831       }
832     }
833     PetscCall(MatStashScatterEnd_Private(&mat->stash));
834     /* Now process the block-stash. Since the values are stashed column-oriented,
835        set the roworiented flag to column oriented, and after MatSetValues()
836        restore the original flags */
837     r1 = baij->roworiented;
838     r2 = a->roworiented;
839     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
840 
841     baij->roworiented = PETSC_FALSE;
842     a->roworiented    = PETSC_FALSE;
843 
844     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845     while (1) {
846       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847       if (!flg) break;
848 
849       for (i = 0; i < n;) {
850         /* Now identify the consecutive vals belonging to the same row */
851         for (j = i, rstart = row[j]; j < n; j++) {
852           if (row[j] != rstart) break;
853         }
854         if (j < n) ncols = j - i;
855         else ncols = n - i;
856         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857         i = j;
858       }
859     }
860     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
861 
862     baij->roworiented = r1;
863     a->roworiented    = r2;
864 
865     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866   }
867 
868   PetscCall(MatAssemblyBegin(baij->A, mode));
869   PetscCall(MatAssemblyEnd(baij->A, mode));
870 
871   /* determine if any processor has disassembled, if so we must
872      also disassemble ourselves, in order that we may reassemble. */
873   /*
874      if nonzero structure of submatrix B cannot change then we know that
875      no processor disassembled thus we can skip this stuff
876   */
877   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
878     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
879     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880   }
881 
882   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
883   PetscCall(MatAssemblyBegin(baij->B, mode));
884   PetscCall(MatAssemblyEnd(baij->B, mode));
885 
886   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
887 
888   baij->rowvalues = NULL;
889 
890   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
891   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
893     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
899 #include <petscdraw.h>
900 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901 {
902   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
903   PetscInt          bs   = mat->rmap->bs;
904   PetscMPIInt       rank = baij->rank;
905   PetscBool         iascii, isdraw;
906   PetscViewer       sviewer;
907   PetscViewerFormat format;
908 
909   PetscFunctionBegin;
910   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
911   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
912   if (iascii) {
913     PetscCall(PetscViewerGetFormat(viewer, &format));
914     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915       MatInfo info;
916       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
917       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
918       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
919       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
920                                                    mat->rmap->bs, (double)info.memory));
921       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
924       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
925       PetscCall(PetscViewerFlush(viewer));
926       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
927       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
928       PetscCall(VecScatterView(baij->Mvctx, viewer));
929       PetscFunctionReturn(PETSC_SUCCESS);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
932       PetscFunctionReturn(PETSC_SUCCESS);
933     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
934       PetscFunctionReturn(PETSC_SUCCESS);
935     }
936   }
937 
938   if (isdraw) {
939     PetscDraw draw;
940     PetscBool isnull;
941     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
942     PetscCall(PetscDrawIsNull(draw, &isnull));
943     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944   }
945 
946   {
947     /* assemble the entire matrix onto first processor. */
948     Mat           A;
949     Mat_SeqSBAIJ *Aloc;
950     Mat_SeqBAIJ  *Bloc;
951     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952     MatScalar    *a;
953     const char   *matname;
954 
955     /* Should this be the same type as mat? */
956     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957     if (rank == 0) {
958       PetscCall(MatSetSizes(A, M, N, M, N));
959     } else {
960       PetscCall(MatSetSizes(A, 0, 0, M, N));
961     }
962     PetscCall(MatSetType(A, MATMPISBAIJ));
963     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
964     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
968     ai   = Aloc->i;
969     aj   = Aloc->j;
970     a    = Aloc->a;
971     PetscCall(PetscMalloc1(bs, &rvals));
972 
973     for (i = 0; i < mbs; i++) {
974       rvals[0] = bs * (baij->rstartbs + i);
975       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976       for (j = ai[i]; j < ai[i + 1]; j++) {
977         col = (baij->cstartbs + aj[j]) * bs;
978         for (k = 0; k < bs; k++) {
979           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
980           col++;
981           a += bs;
982         }
983       }
984     }
985     /* copy over the B part */
986     Bloc = (Mat_SeqBAIJ *)baij->B->data;
987     ai   = Bloc->i;
988     aj   = Bloc->j;
989     a    = Bloc->a;
990     for (i = 0; i < mbs; i++) {
991       rvals[0] = bs * (baij->rstartbs + i);
992       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993       for (j = ai[i]; j < ai[i + 1]; j++) {
994         col = baij->garray[aj[j]] * bs;
995         for (k = 0; k < bs; k++) {
996           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
997           col++;
998           a += bs;
999         }
1000       }
1001     }
1002     PetscCall(PetscFree(rvals));
1003     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1004     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005     /*
1006        Everyone has to call to draw the matrix since the graphics waits are
1007        synchronized across all processors that share the PetscDraw object
1008     */
1009     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1010     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011     if (rank == 0) {
1012       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
1013       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014     }
1015     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1016     PetscCall(PetscViewerFlush(viewer));
1017     PetscCall(MatDestroy(&A));
1018   }
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
1022 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1024 
1025 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026 {
1027   PetscBool iascii, isdraw, issocket, isbinary;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1032   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1033   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034   if (iascii || isdraw || issocket) {
1035     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1036   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041 {
1042   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1043   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1044   PetscScalar       *from;
1045   const PetscScalar *x;
1046 
1047   PetscFunctionBegin;
1048   /* diagonal part */
1049   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1050   PetscCall(VecSet(a->slvec1b, 0.0));
1051 
1052   /* subdiagonal part */
1053   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1054   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1055 
1056   /* copy x into the vec slvec0 */
1057   PetscCall(VecGetArray(a->slvec0, &from));
1058   PetscCall(VecGetArrayRead(xx, &x));
1059 
1060   PetscCall(PetscArraycpy(from, x, bs * mbs));
1061   PetscCall(VecRestoreArray(a->slvec0, &from));
1062   PetscCall(VecRestoreArrayRead(xx, &x));
1063 
1064   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1065   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1066   /* supperdiagonal part */
1067   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1072 {
1073   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1074   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1075   PetscScalar       *from;
1076   const PetscScalar *x;
1077 
1078   PetscFunctionBegin;
1079   /* diagonal part */
1080   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1081   PetscCall(VecSet(a->slvec1b, 0.0));
1082 
1083   /* subdiagonal part */
1084   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1085 
1086   /* copy x into the vec slvec0 */
1087   PetscCall(VecGetArray(a->slvec0, &from));
1088   PetscCall(VecGetArrayRead(xx, &x));
1089 
1090   PetscCall(PetscArraycpy(from, x, bs * mbs));
1091   PetscCall(VecRestoreArray(a->slvec0, &from));
1092   PetscCall(VecRestoreArrayRead(xx, &x));
1093 
1094   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1095   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1096   /* supperdiagonal part */
1097   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1098   PetscFunctionReturn(PETSC_SUCCESS);
1099 }
1100 
1101 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1102 {
1103   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1104   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1105   PetscScalar       *from, zero       = 0.0;
1106   const PetscScalar *x;
1107 
1108   PetscFunctionBegin;
1109   /* diagonal part */
1110   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1111   PetscCall(VecSet(a->slvec1b, zero));
1112 
1113   /* subdiagonal part */
1114   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1115   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1116 
1117   /* copy x into the vec slvec0 */
1118   PetscCall(VecGetArray(a->slvec0, &from));
1119   PetscCall(VecGetArrayRead(xx, &x));
1120   PetscCall(PetscArraycpy(from, x, bs * mbs));
1121   PetscCall(VecRestoreArray(a->slvec0, &from));
1122 
1123   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1124   PetscCall(VecRestoreArrayRead(xx, &x));
1125   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1126 
1127   /* supperdiagonal part */
1128   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133 {
1134   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1135   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1136   PetscScalar       *from, zero       = 0.0;
1137   const PetscScalar *x;
1138 
1139   PetscFunctionBegin;
1140   /* diagonal part */
1141   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1142   PetscCall(VecSet(a->slvec1b, zero));
1143 
1144   /* subdiagonal part */
1145   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1146 
1147   /* copy x into the vec slvec0 */
1148   PetscCall(VecGetArray(a->slvec0, &from));
1149   PetscCall(VecGetArrayRead(xx, &x));
1150   PetscCall(PetscArraycpy(from, x, bs * mbs));
1151   PetscCall(VecRestoreArray(a->slvec0, &from));
1152 
1153   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1154   PetscCall(VecRestoreArrayRead(xx, &x));
1155   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1156 
1157   /* supperdiagonal part */
1158   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1159   PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1167 {
1168   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1172   PetscCall(MatGetDiagonal(a->A, v));
1173   PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175 
1176 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1177 {
1178   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1179 
1180   PetscFunctionBegin;
1181   PetscCall(MatScale(a->A, aa));
1182   PetscCall(MatScale(a->B, aa));
1183   PetscFunctionReturn(PETSC_SUCCESS);
1184 }
1185 
1186 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1187 {
1188   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1189   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1190   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1191   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1192   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1193 
1194   PetscFunctionBegin;
1195   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1196   mat->getrowactive = PETSC_TRUE;
1197 
1198   if (!mat->rowvalues && (idx || v)) {
1199     /*
1200         allocate enough space to hold information from the longest row.
1201     */
1202     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1203     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1204     PetscInt      max = 1, mbs = mat->mbs, tmp;
1205     for (i = 0; i < mbs; i++) {
1206       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1207       if (max < tmp) max = tmp;
1208     }
1209     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1210   }
1211 
1212   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1213   lrow = row - brstart; /* local row index */
1214 
1215   pvA = &vworkA;
1216   pcA = &cworkA;
1217   pvB = &vworkB;
1218   pcB = &cworkB;
1219   if (!v) {
1220     pvA = NULL;
1221     pvB = NULL;
1222   }
1223   if (!idx) {
1224     pcA = NULL;
1225     if (!v) pcB = NULL;
1226   }
1227   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1228   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1229   nztot = nzA + nzB;
1230 
1231   cmap = mat->garray;
1232   if (v || idx) {
1233     if (nztot) {
1234       /* Sort by increasing column numbers, assuming A and B already sorted */
1235       PetscInt imark = -1;
1236       if (v) {
1237         *v = v_p = mat->rowvalues;
1238         for (i = 0; i < nzB; i++) {
1239           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1240           else break;
1241         }
1242         imark = i;
1243         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1244         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1245       }
1246       if (idx) {
1247         *idx = idx_p = mat->rowindices;
1248         if (imark > -1) {
1249           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1250         } else {
1251           for (i = 0; i < nzB; i++) {
1252             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253             else break;
1254           }
1255           imark = i;
1256         }
1257         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1258         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259       }
1260     } else {
1261       if (idx) *idx = NULL;
1262       if (v) *v = NULL;
1263     }
1264   }
1265   *nz = nztot;
1266   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1267   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1272 {
1273   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1274 
1275   PetscFunctionBegin;
1276   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1277   baij->getrowactive = PETSC_FALSE;
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1282 {
1283   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1284   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1285 
1286   PetscFunctionBegin;
1287   aA->getrow_utriangular = PETSC_TRUE;
1288   PetscFunctionReturn(PETSC_SUCCESS);
1289 }
1290 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1291 {
1292   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1293   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1294 
1295   PetscFunctionBegin;
1296   aA->getrow_utriangular = PETSC_FALSE;
1297   PetscFunctionReturn(PETSC_SUCCESS);
1298 }
1299 
1300 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1301 {
1302   PetscFunctionBegin;
1303   if (PetscDefined(USE_COMPLEX)) {
1304     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1305 
1306     PetscCall(MatConjugate(a->A));
1307     PetscCall(MatConjugate(a->B));
1308   }
1309   PetscFunctionReturn(PETSC_SUCCESS);
1310 }
1311 
1312 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1315 
1316   PetscFunctionBegin;
1317   PetscCall(MatRealPart(a->A));
1318   PetscCall(MatRealPart(a->B));
1319   PetscFunctionReturn(PETSC_SUCCESS);
1320 }
1321 
1322 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1325 
1326   PetscFunctionBegin;
1327   PetscCall(MatImaginaryPart(a->A));
1328   PetscCall(MatImaginaryPart(a->B));
1329   PetscFunctionReturn(PETSC_SUCCESS);
1330 }
1331 
1332 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1333    Input: isrow       - distributed(parallel),
1334           iscol_local - locally owned (seq)
1335 */
1336 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1337 {
1338   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1339   const PetscInt *ptr1, *ptr2;
1340 
1341   PetscFunctionBegin;
1342   PetscCall(ISGetLocalSize(isrow, &sz1));
1343   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1344   if (sz1 > sz2) {
1345     *flg = PETSC_FALSE;
1346     PetscFunctionReturn(PETSC_SUCCESS);
1347   }
1348 
1349   PetscCall(ISGetIndices(isrow, &ptr1));
1350   PetscCall(ISGetIndices(iscol_local, &ptr2));
1351 
1352   PetscCall(PetscMalloc1(sz1, &a1));
1353   PetscCall(PetscMalloc1(sz2, &a2));
1354   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1355   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1356   PetscCall(PetscSortInt(sz1, a1));
1357   PetscCall(PetscSortInt(sz2, a2));
1358 
1359   nmatch = 0;
1360   k      = 0;
1361   for (i = 0; i < sz1; i++) {
1362     for (j = k; j < sz2; j++) {
1363       if (a1[i] == a2[j]) {
1364         k = j;
1365         nmatch++;
1366         break;
1367       }
1368     }
1369   }
1370   PetscCall(ISRestoreIndices(isrow, &ptr1));
1371   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1372   PetscCall(PetscFree(a1));
1373   PetscCall(PetscFree(a2));
1374   if (nmatch < sz1) {
1375     *flg = PETSC_FALSE;
1376   } else {
1377     *flg = PETSC_TRUE;
1378   }
1379   PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381 
1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1383 {
1384   IS        iscol_local;
1385   PetscInt  csize;
1386   PetscBool isequal;
1387 
1388   PetscFunctionBegin;
1389   PetscCall(ISGetLocalSize(iscol, &csize));
1390   if (call == MAT_REUSE_MATRIX) {
1391     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1392     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1393   } else {
1394     PetscBool issorted;
1395 
1396     PetscCall(ISAllGather(iscol, &iscol_local));
1397     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1398     PetscCall(ISSorted(iscol_local, &issorted));
1399     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1400   }
1401 
1402   /* now call MatCreateSubMatrix_MPIBAIJ() */
1403   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1404   if (call == MAT_INITIAL_MATRIX) {
1405     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1406     PetscCall(ISDestroy(&iscol_local));
1407   }
1408   PetscFunctionReturn(PETSC_SUCCESS);
1409 }
1410 
1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(MatZeroEntries(l->A));
1417   PetscCall(MatZeroEntries(l->B));
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1422 {
1423   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1424   Mat            A = a->A, B = a->B;
1425   PetscLogDouble isend[5], irecv[5];
1426 
1427   PetscFunctionBegin;
1428   info->block_size = (PetscReal)matin->rmap->bs;
1429 
1430   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1431 
1432   isend[0] = info->nz_used;
1433   isend[1] = info->nz_allocated;
1434   isend[2] = info->nz_unneeded;
1435   isend[3] = info->memory;
1436   isend[4] = info->mallocs;
1437 
1438   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1439 
1440   isend[0] += info->nz_used;
1441   isend[1] += info->nz_allocated;
1442   isend[2] += info->nz_unneeded;
1443   isend[3] += info->memory;
1444   isend[4] += info->mallocs;
1445   if (flag == MAT_LOCAL) {
1446     info->nz_used      = isend[0];
1447     info->nz_allocated = isend[1];
1448     info->nz_unneeded  = isend[2];
1449     info->memory       = isend[3];
1450     info->mallocs      = isend[4];
1451   } else if (flag == MAT_GLOBAL_MAX) {
1452     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1453 
1454     info->nz_used      = irecv[0];
1455     info->nz_allocated = irecv[1];
1456     info->nz_unneeded  = irecv[2];
1457     info->memory       = irecv[3];
1458     info->mallocs      = irecv[4];
1459   } else if (flag == MAT_GLOBAL_SUM) {
1460     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1461 
1462     info->nz_used      = irecv[0];
1463     info->nz_allocated = irecv[1];
1464     info->nz_unneeded  = irecv[2];
1465     info->memory       = irecv[3];
1466     info->mallocs      = irecv[4];
1467   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1468   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1469   info->fill_ratio_needed = 0;
1470   info->factor_mallocs    = 0;
1471   PetscFunctionReturn(PETSC_SUCCESS);
1472 }
1473 
1474 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1475 {
1476   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1477   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1478 
1479   PetscFunctionBegin;
1480   switch (op) {
1481   case MAT_NEW_NONZERO_LOCATIONS:
1482   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1483   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1484   case MAT_KEEP_NONZERO_PATTERN:
1485   case MAT_SUBMAT_SINGLEIS:
1486   case MAT_NEW_NONZERO_LOCATION_ERR:
1487     MatCheckPreallocated(A, 1);
1488     PetscCall(MatSetOption(a->A, op, flg));
1489     PetscCall(MatSetOption(a->B, op, flg));
1490     break;
1491   case MAT_ROW_ORIENTED:
1492     MatCheckPreallocated(A, 1);
1493     a->roworiented = flg;
1494 
1495     PetscCall(MatSetOption(a->A, op, flg));
1496     PetscCall(MatSetOption(a->B, op, flg));
1497     break;
1498   case MAT_FORCE_DIAGONAL_ENTRIES:
1499   case MAT_SORTED_FULL:
1500     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1501     break;
1502   case MAT_IGNORE_OFF_PROC_ENTRIES:
1503     a->donotstash = flg;
1504     break;
1505   case MAT_USE_HASH_TABLE:
1506     a->ht_flag = flg;
1507     break;
1508   case MAT_HERMITIAN:
1509     MatCheckPreallocated(A, 1);
1510     PetscCall(MatSetOption(a->A, op, flg));
1511 #if defined(PETSC_USE_COMPLEX)
1512     if (flg) { /* need different mat-vec ops */
1513       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1514       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1515       A->ops->multtranspose    = NULL;
1516       A->ops->multtransposeadd = NULL;
1517       A->symmetric             = PETSC_BOOL3_FALSE;
1518     }
1519 #endif
1520     break;
1521   case MAT_SPD:
1522   case MAT_SYMMETRIC:
1523     MatCheckPreallocated(A, 1);
1524     PetscCall(MatSetOption(a->A, op, flg));
1525 #if defined(PETSC_USE_COMPLEX)
1526     if (flg) { /* restore to use default mat-vec ops */
1527       A->ops->mult             = MatMult_MPISBAIJ;
1528       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1529       A->ops->multtranspose    = MatMult_MPISBAIJ;
1530       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1531     }
1532 #endif
1533     break;
1534   case MAT_STRUCTURALLY_SYMMETRIC:
1535     MatCheckPreallocated(A, 1);
1536     PetscCall(MatSetOption(a->A, op, flg));
1537     break;
1538   case MAT_SYMMETRY_ETERNAL:
1539   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1540     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1541     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1542     break;
1543   case MAT_SPD_ETERNAL:
1544     break;
1545   case MAT_IGNORE_LOWER_TRIANGULAR:
1546     aA->ignore_ltriangular = flg;
1547     break;
1548   case MAT_ERROR_LOWER_TRIANGULAR:
1549     aA->ignore_ltriangular = flg;
1550     break;
1551   case MAT_GETROW_UPPERTRIANGULAR:
1552     aA->getrow_utriangular = flg;
1553     break;
1554   default:
1555     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1556   }
1557   PetscFunctionReturn(PETSC_SUCCESS);
1558 }
1559 
1560 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1561 {
1562   PetscFunctionBegin;
1563   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1564   if (reuse == MAT_INITIAL_MATRIX) {
1565     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1566   } else if (reuse == MAT_REUSE_MATRIX) {
1567     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1568   }
1569   PetscFunctionReturn(PETSC_SUCCESS);
1570 }
1571 
1572 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1573 {
1574   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1575   Mat           a = baij->A, b = baij->B;
1576   PetscInt      nv, m, n;
1577   PetscBool     flg;
1578 
1579   PetscFunctionBegin;
1580   if (ll != rr) {
1581     PetscCall(VecEqual(ll, rr, &flg));
1582     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1583   }
1584   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1585 
1586   PetscCall(MatGetLocalSize(mat, &m, &n));
1587   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1588 
1589   PetscCall(VecGetLocalSize(rr, &nv));
1590   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1591 
1592   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593 
1594   /* left diagonalscale the off-diagonal part */
1595   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1596 
1597   /* scale the diagonal part */
1598   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1599 
1600   /* right diagonalscale the off-diagonal part */
1601   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1602   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1607 {
1608   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(MatSetUnfactored(a->A));
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1616 
1617 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1618 {
1619   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1620   Mat           a, b, c, d;
1621   PetscBool     flg;
1622 
1623   PetscFunctionBegin;
1624   a = matA->A;
1625   b = matA->B;
1626   c = matB->A;
1627   d = matB->B;
1628 
1629   PetscCall(MatEqual(a, c, &flg));
1630   if (flg) PetscCall(MatEqual(b, d, &flg));
1631   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1636 {
1637   PetscBool isbaij;
1638 
1639   PetscFunctionBegin;
1640   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1641   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1642   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1643   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1644     PetscCall(MatGetRowUpperTriangular(A));
1645     PetscCall(MatCopy_Basic(A, B, str));
1646     PetscCall(MatRestoreRowUpperTriangular(A));
1647   } else {
1648     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1649     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1650 
1651     PetscCall(MatCopy(a->A, b->A, str));
1652     PetscCall(MatCopy(a->B, b->B, str));
1653   }
1654   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1659 {
1660   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1661   PetscBLASInt  bnz, one                          = 1;
1662   Mat_SeqSBAIJ *xa, *ya;
1663   Mat_SeqBAIJ  *xb, *yb;
1664 
1665   PetscFunctionBegin;
1666   if (str == SAME_NONZERO_PATTERN) {
1667     PetscScalar alpha = a;
1668     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1669     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1670     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1671     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1672     xb = (Mat_SeqBAIJ *)xx->B->data;
1673     yb = (Mat_SeqBAIJ *)yy->B->data;
1674     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1675     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1676     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1677   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1678     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1679     PetscCall(MatAXPY_Basic(Y, a, X, str));
1680     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1681   } else {
1682     Mat       B;
1683     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1684     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1685     PetscCall(MatGetRowUpperTriangular(X));
1686     PetscCall(MatGetRowUpperTriangular(Y));
1687     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1688     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1689     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1690     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1691     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1692     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1693     PetscCall(MatSetType(B, MATMPISBAIJ));
1694     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1695     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1696     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1697     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1698     PetscCall(MatHeaderMerge(Y, &B));
1699     PetscCall(PetscFree(nnz_d));
1700     PetscCall(PetscFree(nnz_o));
1701     PetscCall(MatRestoreRowUpperTriangular(X));
1702     PetscCall(MatRestoreRowUpperTriangular(Y));
1703   }
1704   PetscFunctionReturn(PETSC_SUCCESS);
1705 }
1706 
1707 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1708 {
1709   PetscInt  i;
1710   PetscBool flg;
1711 
1712   PetscFunctionBegin;
1713   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1714   for (i = 0; i < n; i++) {
1715     PetscCall(ISEqual(irow[i], icol[i], &flg));
1716     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1717   }
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1722 {
1723   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1724   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1725 
1726   PetscFunctionBegin;
1727   if (!Y->preallocated) {
1728     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1729   } else if (!aij->nz) {
1730     PetscInt nonew = aij->nonew;
1731     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1732     aij->nonew = nonew;
1733   }
1734   PetscCall(MatShift_Basic(Y, a));
1735   PetscFunctionReturn(PETSC_SUCCESS);
1736 }
1737 
1738 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1739 {
1740   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1741 
1742   PetscFunctionBegin;
1743   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1744   PetscCall(MatMissingDiagonal(a->A, missing, d));
1745   if (d) {
1746     PetscInt rstart;
1747     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1748     *d += rstart / A->rmap->bs;
1749   }
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1754 {
1755   PetscFunctionBegin;
1756   *a = ((Mat_MPISBAIJ *)A->data)->A;
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 /* -------------------------------------------------------------------*/
1761 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1762                                        MatGetRow_MPISBAIJ,
1763                                        MatRestoreRow_MPISBAIJ,
1764                                        MatMult_MPISBAIJ,
1765                                        /*  4*/ MatMultAdd_MPISBAIJ,
1766                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1767                                        MatMultAdd_MPISBAIJ,
1768                                        NULL,
1769                                        NULL,
1770                                        NULL,
1771                                        /* 10*/ NULL,
1772                                        NULL,
1773                                        NULL,
1774                                        MatSOR_MPISBAIJ,
1775                                        MatTranspose_MPISBAIJ,
1776                                        /* 15*/ MatGetInfo_MPISBAIJ,
1777                                        MatEqual_MPISBAIJ,
1778                                        MatGetDiagonal_MPISBAIJ,
1779                                        MatDiagonalScale_MPISBAIJ,
1780                                        MatNorm_MPISBAIJ,
1781                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1782                                        MatAssemblyEnd_MPISBAIJ,
1783                                        MatSetOption_MPISBAIJ,
1784                                        MatZeroEntries_MPISBAIJ,
1785                                        /* 24*/ NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        /* 29*/ MatSetUp_MPI_Hash,
1791                                        NULL,
1792                                        NULL,
1793                                        MatGetDiagonalBlock_MPISBAIJ,
1794                                        NULL,
1795                                        /* 34*/ MatDuplicate_MPISBAIJ,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        /* 39*/ MatAXPY_MPISBAIJ,
1801                                        MatCreateSubMatrices_MPISBAIJ,
1802                                        MatIncreaseOverlap_MPISBAIJ,
1803                                        MatGetValues_MPISBAIJ,
1804                                        MatCopy_MPISBAIJ,
1805                                        /* 44*/ NULL,
1806                                        MatScale_MPISBAIJ,
1807                                        MatShift_MPISBAIJ,
1808                                        NULL,
1809                                        NULL,
1810                                        /* 49*/ NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        NULL,
1815                                        /* 54*/ NULL,
1816                                        NULL,
1817                                        MatSetUnfactored_MPISBAIJ,
1818                                        NULL,
1819                                        MatSetValuesBlocked_MPISBAIJ,
1820                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        /* 64*/ NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1831                                        NULL,
1832                                        MatConvert_MPISBAIJ_Basic,
1833                                        NULL,
1834                                        NULL,
1835                                        /* 74*/ NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        /* 79*/ NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        MatLoad_MPISBAIJ,
1845                                        /* 84*/ NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        /* 89*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        /* 94*/ NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        /* 99*/ NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        MatConjugate_MPISBAIJ,
1864                                        NULL,
1865                                        /*104*/ NULL,
1866                                        MatRealPart_MPISBAIJ,
1867                                        MatImaginaryPart_MPISBAIJ,
1868                                        MatGetRowUpperTriangular_MPISBAIJ,
1869                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1870                                        /*109*/ NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        MatMissingDiagonal_MPISBAIJ,
1875                                        /*114*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        /*119*/ NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        /*124*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        /*129*/ NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        /*134*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        /*139*/ MatSetBlockSizes_Default,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        /*150*/ NULL,
1912                                        NULL};
1913 
1914 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1915 {
1916   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1917   PetscInt      i, mbs, Mbs;
1918   PetscMPIInt   size;
1919 
1920   PetscFunctionBegin;
1921   if (B->hash_active) {
1922     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
1923     B->hash_active = PETSC_FALSE;
1924   }
1925   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1926   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1927   PetscCall(PetscLayoutSetUp(B->rmap));
1928   PetscCall(PetscLayoutSetUp(B->cmap));
1929   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1930   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1931   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1932 
1933   mbs = B->rmap->n / bs;
1934   Mbs = B->rmap->N / bs;
1935   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1936 
1937   B->rmap->bs = bs;
1938   b->bs2      = bs * bs;
1939   b->mbs      = mbs;
1940   b->Mbs      = Mbs;
1941   b->nbs      = B->cmap->n / bs;
1942   b->Nbs      = B->cmap->N / bs;
1943 
1944   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1945   b->rstartbs = B->rmap->rstart / bs;
1946   b->rendbs   = B->rmap->rend / bs;
1947 
1948   b->cstartbs = B->cmap->rstart / bs;
1949   b->cendbs   = B->cmap->rend / bs;
1950 
1951 #if defined(PETSC_USE_CTABLE)
1952   PetscCall(PetscHMapIDestroy(&b->colmap));
1953 #else
1954   PetscCall(PetscFree(b->colmap));
1955 #endif
1956   PetscCall(PetscFree(b->garray));
1957   PetscCall(VecDestroy(&b->lvec));
1958   PetscCall(VecScatterDestroy(&b->Mvctx));
1959   PetscCall(VecDestroy(&b->slvec0));
1960   PetscCall(VecDestroy(&b->slvec0b));
1961   PetscCall(VecDestroy(&b->slvec1));
1962   PetscCall(VecDestroy(&b->slvec1a));
1963   PetscCall(VecDestroy(&b->slvec1b));
1964   PetscCall(VecScatterDestroy(&b->sMvctx));
1965 
1966   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1967   PetscCall(MatDestroy(&b->B));
1968   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1969   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1970   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1971 
1972   PetscCall(MatDestroy(&b->A));
1973   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1974   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1975   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1976 
1977   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1978   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1979 
1980   B->preallocated  = PETSC_TRUE;
1981   B->was_assembled = PETSC_FALSE;
1982   B->assembled     = PETSC_FALSE;
1983   PetscFunctionReturn(PETSC_SUCCESS);
1984 }
1985 
1986 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1987 {
1988   PetscInt        m, rstart, cend;
1989   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1990   const PetscInt *JJ          = NULL;
1991   PetscScalar    *values      = NULL;
1992   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1993   PetscBool       nooffprocentries;
1994 
1995   PetscFunctionBegin;
1996   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1997   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1998   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1999   PetscCall(PetscLayoutSetUp(B->rmap));
2000   PetscCall(PetscLayoutSetUp(B->cmap));
2001   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2002   m      = B->rmap->n / bs;
2003   rstart = B->rmap->rstart / bs;
2004   cend   = B->cmap->rend / bs;
2005 
2006   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2007   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2008   for (i = 0; i < m; i++) {
2009     nz = ii[i + 1] - ii[i];
2010     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2011     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2012     JJ = jj + ii[i];
2013     bd = 0;
2014     for (j = 0; j < nz; j++) {
2015       if (*JJ >= i + rstart) break;
2016       JJ++;
2017       bd++;
2018     }
2019     d = 0;
2020     for (; j < nz; j++) {
2021       if (*JJ++ >= cend) break;
2022       d++;
2023     }
2024     d_nnz[i] = d;
2025     o_nnz[i] = nz - d - bd;
2026     nz       = nz - bd;
2027     nz_max   = PetscMax(nz_max, nz);
2028   }
2029   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2030   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2031   PetscCall(PetscFree2(d_nnz, o_nnz));
2032 
2033   values = (PetscScalar *)V;
2034   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2035   for (i = 0; i < m; i++) {
2036     PetscInt        row   = i + rstart;
2037     PetscInt        ncols = ii[i + 1] - ii[i];
2038     const PetscInt *icols = jj + ii[i];
2039     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2040       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2041       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2042     } else { /* block ordering does not match so we can only insert one block at a time. */
2043       PetscInt j;
2044       for (j = 0; j < ncols; j++) {
2045         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2046         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2047       }
2048     }
2049   }
2050 
2051   if (!V) PetscCall(PetscFree(values));
2052   nooffprocentries    = B->nooffprocentries;
2053   B->nooffprocentries = PETSC_TRUE;
2054   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2055   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2056   B->nooffprocentries = nooffprocentries;
2057 
2058   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2059   PetscFunctionReturn(PETSC_SUCCESS);
2060 }
2061 
2062 /*MC
2063    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2064    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2065    the matrix is stored.
2066 
2067    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2068    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2069 
2070    Options Database Keys:
2071 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2072 
2073    Note:
2074      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2075      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2076 
2077    Level: beginner
2078 
2079 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2080 M*/
2081 
2082 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2083 {
2084   Mat_MPISBAIJ *b;
2085   PetscBool     flg = PETSC_FALSE;
2086 
2087   PetscFunctionBegin;
2088   PetscCall(PetscNew(&b));
2089   B->data = (void *)b;
2090   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2091 
2092   B->ops->destroy = MatDestroy_MPISBAIJ;
2093   B->ops->view    = MatView_MPISBAIJ;
2094   B->assembled    = PETSC_FALSE;
2095   B->insertmode   = NOT_SET_VALUES;
2096 
2097   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2098   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2099 
2100   /* build local table of row and column ownerships */
2101   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2102 
2103   /* build cache for off array entries formed */
2104   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2105 
2106   b->donotstash  = PETSC_FALSE;
2107   b->colmap      = NULL;
2108   b->garray      = NULL;
2109   b->roworiented = PETSC_TRUE;
2110 
2111   /* stuff used in block assembly */
2112   b->barray = NULL;
2113 
2114   /* stuff used for matrix vector multiply */
2115   b->lvec    = NULL;
2116   b->Mvctx   = NULL;
2117   b->slvec0  = NULL;
2118   b->slvec0b = NULL;
2119   b->slvec1  = NULL;
2120   b->slvec1a = NULL;
2121   b->slvec1b = NULL;
2122   b->sMvctx  = NULL;
2123 
2124   /* stuff for MatGetRow() */
2125   b->rowindices   = NULL;
2126   b->rowvalues    = NULL;
2127   b->getrowactive = PETSC_FALSE;
2128 
2129   /* hash table stuff */
2130   b->ht           = NULL;
2131   b->hd           = NULL;
2132   b->ht_size      = 0;
2133   b->ht_flag      = PETSC_FALSE;
2134   b->ht_fact      = 0;
2135   b->ht_total_ct  = 0;
2136   b->ht_insert_ct = 0;
2137 
2138   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2139   b->ijonly = PETSC_FALSE;
2140 
2141   b->in_loc = NULL;
2142   b->v_loc  = NULL;
2143   b->n_loc  = 0;
2144 
2145   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2147   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2149 #if defined(PETSC_HAVE_ELEMENTAL)
2150   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2151 #endif
2152 #if defined(PETSC_HAVE_SCALAPACK)
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2154 #endif
2155   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2156   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2157 
2158   B->symmetric                   = PETSC_BOOL3_TRUE;
2159   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2160   B->symmetry_eternal            = PETSC_TRUE;
2161   B->structural_symmetry_eternal = PETSC_TRUE;
2162 #if defined(PETSC_USE_COMPLEX)
2163   B->hermitian = PETSC_BOOL3_FALSE;
2164 #else
2165   B->hermitian = PETSC_BOOL3_TRUE;
2166 #endif
2167 
2168   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2169   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2170   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2171   if (flg) {
2172     PetscReal fact = 1.39;
2173     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2174     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2175     if (fact <= 1.0) fact = 1.39;
2176     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2177     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2178   }
2179   PetscOptionsEnd();
2180   PetscFunctionReturn(PETSC_SUCCESS);
2181 }
2182 
2183 /*MC
2184    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2185 
2186    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2187    and `MATMPISBAIJ` otherwise.
2188 
2189    Options Database Key:
2190 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2191 
2192   Level: beginner
2193 
2194 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2195 M*/
2196 
2197 /*@C
2198    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2199    the user should preallocate the matrix storage by setting the parameters
2200    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2201    performance can be increased by more than a factor of 50.
2202 
2203    Collective
2204 
2205    Input Parameters:
2206 +  B - the matrix
2207 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2208           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2209 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2210            submatrix  (same for all local rows)
2211 .  d_nnz - array containing the number of block nonzeros in the various block rows
2212            in the upper triangular and diagonal part of the in diagonal portion of the local
2213            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2214            for the diagonal entry and set a value even if it is zero.
2215 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2216            submatrix (same for all local rows).
2217 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2218            off-diagonal portion of the local submatrix that is right of the diagonal
2219            (possibly different for each block row) or NULL.
2220 
2221    Options Database Keys:
2222 +   -mat_no_unroll - uses code that does not unroll the loops in the
2223                      block calculations (much slower)
2224 -   -mat_block_size - size of the blocks to use
2225 
2226    Notes:
2227 
2228    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2229    than it must be used on all processors that share the object for that argument.
2230 
2231    If the *_nnz parameter is given then the *_nz parameter is ignored
2232 
2233    Storage Information:
2234    For a square global matrix we define each processor's diagonal portion
2235    to be its local rows and the corresponding columns (a square submatrix);
2236    each processor's off-diagonal portion encompasses the remainder of the
2237    local matrix (a rectangular submatrix).
2238 
2239    The user can specify preallocated storage for the diagonal part of
2240    the local submatrix with either d_nz or d_nnz (not both).  Set
2241    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2242    memory allocation.  Likewise, specify preallocated storage for the
2243    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2244 
2245    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2246    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2247    You can also run with the option -info and look for messages with the string
2248    malloc in them to see if additional memory allocation was needed.
2249 
2250    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2251    the figure below we depict these three local rows and all columns (0-11).
2252 
2253 .vb
2254            0 1 2 3 4 5 6 7 8 9 10 11
2255           --------------------------
2256    row 3  |. . . d d d o o o o  o  o
2257    row 4  |. . . d d d o o o o  o  o
2258    row 5  |. . . d d d o o o o  o  o
2259           --------------------------
2260 .ve
2261 
2262    Thus, any entries in the d locations are stored in the d (diagonal)
2263    submatrix, and any entries in the o locations are stored in the
2264    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2265    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2266 
2267    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2268    plus the diagonal part of the d matrix,
2269    and o_nz should indicate the number of block nonzeros per row in the o matrix
2270 
2271    In general, for PDE problems in which most nonzeros are near the diagonal,
2272    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2273    or you will get TERRIBLE performance; see the users' manual chapter on
2274    matrices.
2275 
2276    Level: intermediate
2277 
2278 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2279 @*/
2280 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2281 {
2282   PetscFunctionBegin;
2283   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2284   PetscValidType(B, 1);
2285   PetscValidLogicalCollectiveInt(B, bs, 2);
2286   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2287   PetscFunctionReturn(PETSC_SUCCESS);
2288 }
2289 
2290 /*@C
2291    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2292    (block compressed row).  For good matrix assembly performance
2293    the user should preallocate the matrix storage by setting the parameters
2294    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2295    performance can be increased by more than a factor of 50.
2296 
2297    Collective
2298 
2299    Input Parameters:
2300 +  comm - MPI communicator
2301 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2302           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2303 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2304            This value should be the same as the local size used in creating the
2305            y vector for the matrix-vector product y = Ax.
2306 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2307            This value should be the same as the local size used in creating the
2308            x vector for the matrix-vector product y = Ax.
2309 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2310 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2311 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2312            submatrix (same for all local rows)
2313 .  d_nnz - array containing the number of block nonzeros in the various block rows
2314            in the upper triangular portion of the in diagonal portion of the local
2315            (possibly different for each block block row) or NULL.
2316            If you plan to factor the matrix you must leave room for the diagonal entry and
2317            set its value even if it is zero.
2318 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2319            submatrix (same for all local rows).
2320 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2321            off-diagonal portion of the local submatrix (possibly different for
2322            each block row) or NULL.
2323 
2324    Output Parameter:
2325 .  A - the matrix
2326 
2327    Options Database Keys:
2328 +   -mat_no_unroll - uses code that does not unroll the loops in the
2329                      block calculations (much slower)
2330 .   -mat_block_size - size of the blocks to use
2331 -   -mat_mpi - use the parallel matrix data structures even on one processor
2332                (defaults to using SeqBAIJ format on one processor)
2333 
2334    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2335    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2336    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2337 
2338    Notes:
2339    The number of rows and columns must be divisible by blocksize.
2340    This matrix type does not support complex Hermitian operation.
2341 
2342    The user MUST specify either the local or global matrix dimensions
2343    (possibly both).
2344 
2345    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2346    than it must be used on all processors that share the object for that argument.
2347 
2348    If the *_nnz parameter is given then the *_nz parameter is ignored
2349 
2350    Storage Information:
2351    For a square global matrix we define each processor's diagonal portion
2352    to be its local rows and the corresponding columns (a square submatrix);
2353    each processor's off-diagonal portion encompasses the remainder of the
2354    local matrix (a rectangular submatrix).
2355 
2356    The user can specify preallocated storage for the diagonal part of
2357    the local submatrix with either d_nz or d_nnz (not both). Set
2358    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2359    memory allocation. Likewise, specify preallocated storage for the
2360    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2361 
2362    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2363    the figure below we depict these three local rows and all columns (0-11).
2364 
2365 .vb
2366            0 1 2 3 4 5 6 7 8 9 10 11
2367           --------------------------
2368    row 3  |. . . d d d o o o o  o  o
2369    row 4  |. . . d d d o o o o  o  o
2370    row 5  |. . . d d d o o o o  o  o
2371           --------------------------
2372 .ve
2373 
2374    Thus, any entries in the d locations are stored in the d (diagonal)
2375    submatrix, and any entries in the o locations are stored in the
2376    o (off-diagonal) submatrix. Note that the d matrix is stored in
2377    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2378 
2379    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2380    plus the diagonal part of the d matrix,
2381    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2382    In general, for PDE problems in which most nonzeros are near the diagonal,
2383    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2384    or you will get TERRIBLE performance; see the users' manual chapter on
2385    matrices.
2386 
2387    Level: intermediate
2388 
2389 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2390 @*/
2391 
2392 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2393 {
2394   PetscMPIInt size;
2395 
2396   PetscFunctionBegin;
2397   PetscCall(MatCreate(comm, A));
2398   PetscCall(MatSetSizes(*A, m, n, M, N));
2399   PetscCallMPI(MPI_Comm_size(comm, &size));
2400   if (size > 1) {
2401     PetscCall(MatSetType(*A, MATMPISBAIJ));
2402     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2403   } else {
2404     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2405     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2406   }
2407   PetscFunctionReturn(PETSC_SUCCESS);
2408 }
2409 
2410 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2411 {
2412   Mat           mat;
2413   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2414   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2415   PetscScalar  *array;
2416 
2417   PetscFunctionBegin;
2418   *newmat = NULL;
2419 
2420   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2421   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2422   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2423   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2424   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2425 
2426   mat->factortype   = matin->factortype;
2427   mat->preallocated = PETSC_TRUE;
2428   mat->assembled    = PETSC_TRUE;
2429   mat->insertmode   = NOT_SET_VALUES;
2430 
2431   a      = (Mat_MPISBAIJ *)mat->data;
2432   a->bs2 = oldmat->bs2;
2433   a->mbs = oldmat->mbs;
2434   a->nbs = oldmat->nbs;
2435   a->Mbs = oldmat->Mbs;
2436   a->Nbs = oldmat->Nbs;
2437 
2438   a->size         = oldmat->size;
2439   a->rank         = oldmat->rank;
2440   a->donotstash   = oldmat->donotstash;
2441   a->roworiented  = oldmat->roworiented;
2442   a->rowindices   = NULL;
2443   a->rowvalues    = NULL;
2444   a->getrowactive = PETSC_FALSE;
2445   a->barray       = NULL;
2446   a->rstartbs     = oldmat->rstartbs;
2447   a->rendbs       = oldmat->rendbs;
2448   a->cstartbs     = oldmat->cstartbs;
2449   a->cendbs       = oldmat->cendbs;
2450 
2451   /* hash table stuff */
2452   a->ht           = NULL;
2453   a->hd           = NULL;
2454   a->ht_size      = 0;
2455   a->ht_flag      = oldmat->ht_flag;
2456   a->ht_fact      = oldmat->ht_fact;
2457   a->ht_total_ct  = 0;
2458   a->ht_insert_ct = 0;
2459 
2460   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2461   if (oldmat->colmap) {
2462 #if defined(PETSC_USE_CTABLE)
2463     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2464 #else
2465     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2466     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2467 #endif
2468   } else a->colmap = NULL;
2469 
2470   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2471     PetscCall(PetscMalloc1(len, &a->garray));
2472     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2473   } else a->garray = NULL;
2474 
2475   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2476   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2477   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2478 
2479   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2480   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2481 
2482   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2483   PetscCall(VecGetArray(a->slvec1, &array));
2484   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2485   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2486   PetscCall(VecRestoreArray(a->slvec1, &array));
2487   PetscCall(VecGetArray(a->slvec0, &array));
2488   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2489   PetscCall(VecRestoreArray(a->slvec0, &array));
2490 
2491   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2492   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2493   a->sMvctx = oldmat->sMvctx;
2494 
2495   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2496   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2497   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2498   *newmat = mat;
2499   PetscFunctionReturn(PETSC_SUCCESS);
2500 }
2501 
2502 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2503 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2504 
2505 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2506 {
2507   PetscBool isbinary;
2508 
2509   PetscFunctionBegin;
2510   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2511   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2512   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2513   PetscFunctionReturn(PETSC_SUCCESS);
2514 }
2515 
2516 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2517 {
2518   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2519   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2520   PetscReal     atmp;
2521   PetscReal    *work, *svalues, *rvalues;
2522   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2523   PetscMPIInt   rank, size;
2524   PetscInt     *rowners_bs, dest, count, source;
2525   PetscScalar  *va;
2526   MatScalar    *ba;
2527   MPI_Status    stat;
2528 
2529   PetscFunctionBegin;
2530   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2531   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2532   PetscCall(VecGetArray(v, &va));
2533 
2534   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2535   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2536 
2537   bs  = A->rmap->bs;
2538   mbs = a->mbs;
2539   Mbs = a->Mbs;
2540   ba  = b->a;
2541   bi  = b->i;
2542   bj  = b->j;
2543 
2544   /* find ownerships */
2545   rowners_bs = A->rmap->range;
2546 
2547   /* each proc creates an array to be distributed */
2548   PetscCall(PetscCalloc1(bs * Mbs, &work));
2549 
2550   /* row_max for B */
2551   if (rank != size - 1) {
2552     for (i = 0; i < mbs; i++) {
2553       ncols = bi[1] - bi[0];
2554       bi++;
2555       brow = bs * i;
2556       for (j = 0; j < ncols; j++) {
2557         bcol = bs * (*bj);
2558         for (kcol = 0; kcol < bs; kcol++) {
2559           col = bcol + kcol;           /* local col index */
2560           col += rowners_bs[rank + 1]; /* global col index */
2561           for (krow = 0; krow < bs; krow++) {
2562             atmp = PetscAbsScalar(*ba);
2563             ba++;
2564             row = brow + krow; /* local row index */
2565             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2566             if (work[col] < atmp) work[col] = atmp;
2567           }
2568         }
2569         bj++;
2570       }
2571     }
2572 
2573     /* send values to its owners */
2574     for (dest = rank + 1; dest < size; dest++) {
2575       svalues = work + rowners_bs[dest];
2576       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2577       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2578     }
2579   }
2580 
2581   /* receive values */
2582   if (rank) {
2583     rvalues = work;
2584     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2585     for (source = 0; source < rank; source++) {
2586       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2587       /* process values */
2588       for (i = 0; i < count; i++) {
2589         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2590       }
2591     }
2592   }
2593 
2594   PetscCall(VecRestoreArray(v, &va));
2595   PetscCall(PetscFree(work));
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2600 {
2601   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2602   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2603   PetscScalar       *x, *ptr, *from;
2604   Vec                bb1;
2605   const PetscScalar *b;
2606 
2607   PetscFunctionBegin;
2608   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2609   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2610 
2611   if (flag == SOR_APPLY_UPPER) {
2612     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2613     PetscFunctionReturn(PETSC_SUCCESS);
2614   }
2615 
2616   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2617     if (flag & SOR_ZERO_INITIAL_GUESS) {
2618       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2619       its--;
2620     }
2621 
2622     PetscCall(VecDuplicate(bb, &bb1));
2623     while (its--) {
2624       /* lower triangular part: slvec0b = - B^T*xx */
2625       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2626 
2627       /* copy xx into slvec0a */
2628       PetscCall(VecGetArray(mat->slvec0, &ptr));
2629       PetscCall(VecGetArray(xx, &x));
2630       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2631       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2632 
2633       PetscCall(VecScale(mat->slvec0, -1.0));
2634 
2635       /* copy bb into slvec1a */
2636       PetscCall(VecGetArray(mat->slvec1, &ptr));
2637       PetscCall(VecGetArrayRead(bb, &b));
2638       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2639       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2640 
2641       /* set slvec1b = 0 */
2642       PetscCall(VecSet(mat->slvec1b, 0.0));
2643 
2644       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2645       PetscCall(VecRestoreArray(xx, &x));
2646       PetscCall(VecRestoreArrayRead(bb, &b));
2647       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2648 
2649       /* upper triangular part: bb1 = bb1 - B*x */
2650       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2651 
2652       /* local diagonal sweep */
2653       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2654     }
2655     PetscCall(VecDestroy(&bb1));
2656   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2657     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2658   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2659     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2660   } else if (flag & SOR_EISENSTAT) {
2661     Vec                xx1;
2662     PetscBool          hasop;
2663     const PetscScalar *diag;
2664     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2665     PetscInt           i, n;
2666 
2667     if (!mat->xx1) {
2668       PetscCall(VecDuplicate(bb, &mat->xx1));
2669       PetscCall(VecDuplicate(bb, &mat->bb1));
2670     }
2671     xx1 = mat->xx1;
2672     bb1 = mat->bb1;
2673 
2674     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2675 
2676     if (!mat->diag) {
2677       /* this is wrong for same matrix with new nonzero values */
2678       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2679       PetscCall(MatGetDiagonal(matin, mat->diag));
2680     }
2681     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2682 
2683     if (hasop) {
2684       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2685       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2686     } else {
2687       /*
2688           These two lines are replaced by code that may be a bit faster for a good compiler
2689       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2690       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2691       */
2692       PetscCall(VecGetArray(mat->slvec1a, &sl));
2693       PetscCall(VecGetArrayRead(mat->diag, &diag));
2694       PetscCall(VecGetArrayRead(bb, &b));
2695       PetscCall(VecGetArray(xx, &x));
2696       PetscCall(VecGetLocalSize(xx, &n));
2697       if (omega == 1.0) {
2698         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2699         PetscCall(PetscLogFlops(2.0 * n));
2700       } else {
2701         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2702         PetscCall(PetscLogFlops(3.0 * n));
2703       }
2704       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2705       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2706       PetscCall(VecRestoreArrayRead(bb, &b));
2707       PetscCall(VecRestoreArray(xx, &x));
2708     }
2709 
2710     /* multiply off-diagonal portion of matrix */
2711     PetscCall(VecSet(mat->slvec1b, 0.0));
2712     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2713     PetscCall(VecGetArray(mat->slvec0, &from));
2714     PetscCall(VecGetArray(xx, &x));
2715     PetscCall(PetscArraycpy(from, x, bs * mbs));
2716     PetscCall(VecRestoreArray(mat->slvec0, &from));
2717     PetscCall(VecRestoreArray(xx, &x));
2718     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2719     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2720     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2721 
2722     /* local sweep */
2723     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2724     PetscCall(VecAXPY(xx, 1.0, xx1));
2725   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2726   PetscFunctionReturn(PETSC_SUCCESS);
2727 }
2728 
2729 /*@
2730      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2731          CSR format the local rows.
2732 
2733    Collective
2734 
2735    Input Parameters:
2736 +  comm - MPI communicator
2737 .  bs - the block size, only a block size of 1 is supported
2738 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2739 .  n - This value should be the same as the local size used in creating the
2740        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2741        calculated if N is given) For square matrices n is almost always m.
2742 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2743 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2744 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2745 .   j - column indices
2746 -   a - matrix values
2747 
2748    Output Parameter:
2749 .   mat - the matrix
2750 
2751    Level: intermediate
2752 
2753    Notes:
2754        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2755      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2756      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2757 
2758        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2759 
2760 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2761           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2762 @*/
2763 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2764 {
2765   PetscFunctionBegin;
2766   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2767   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2768   PetscCall(MatCreate(comm, mat));
2769   PetscCall(MatSetSizes(*mat, m, n, M, N));
2770   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2771   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2772   PetscFunctionReturn(PETSC_SUCCESS);
2773 }
2774 
2775 /*@C
2776    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2777 
2778    Collective
2779 
2780    Input Parameters:
2781 +  B - the matrix
2782 .  bs - the block size
2783 .  i - the indices into j for the start of each local row (starts with zero)
2784 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2785 -  v - optional values in the matrix
2786 
2787    Level: advanced
2788 
2789    Notes:
2790    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2791    and usually the numerical values as well
2792 
2793    Any entries below the diagonal are ignored
2794 
2795 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2796 @*/
2797 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2798 {
2799   PetscFunctionBegin;
2800   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2801   PetscFunctionReturn(PETSC_SUCCESS);
2802 }
2803 
2804 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2805 {
2806   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2807   PetscInt    *indx;
2808   PetscScalar *values;
2809 
2810   PetscFunctionBegin;
2811   PetscCall(MatGetSize(inmat, &m, &N));
2812   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2813     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2814     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2815     PetscInt     *bindx, rmax = a->rmax, j;
2816     PetscMPIInt   rank, size;
2817 
2818     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2819     mbs = m / bs;
2820     Nbs = N / cbs;
2821     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2822     nbs = n / cbs;
2823 
2824     PetscCall(PetscMalloc1(rmax, &bindx));
2825     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2826 
2827     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2828     PetscCallMPI(MPI_Comm_rank(comm, &size));
2829     if (rank == size - 1) {
2830       /* Check sum(nbs) = Nbs */
2831       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2832     }
2833 
2834     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2835     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2836     for (i = 0; i < mbs; i++) {
2837       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2838       nnz = nnz / bs;
2839       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2840       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2841       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2842     }
2843     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2844     PetscCall(PetscFree(bindx));
2845 
2846     PetscCall(MatCreate(comm, outmat));
2847     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2848     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2849     PetscCall(MatSetType(*outmat, MATSBAIJ));
2850     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2851     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2852     MatPreallocateEnd(dnz, onz);
2853   }
2854 
2855   /* numeric phase */
2856   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2857   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2858 
2859   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2860   for (i = 0; i < m; i++) {
2861     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2862     Ii = i + rstart;
2863     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2864     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2865   }
2866   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2867   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2868   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2869   PetscFunctionReturn(PETSC_SUCCESS);
2870 }
2871