xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision cf53795e253c8c9d9d0fbcef4cffd37d916c4e81)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat       preallocator;
18   PetscInt  r, rstart, rend;
19   PetscInt  bs, i, m, n, M, N;
20   PetscBool cong = PETSC_TRUE;
21 
22   PetscFunctionBegin;
23   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
24   PetscValidLogicalCollectiveInt(B, nm, 2);
25   for (i = 0; i < nm; i++) {
26     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
27     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
28     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
29   }
30   PetscValidLogicalCollectiveBool(B, fill, 5);
31   PetscCall(MatGetBlockSize(B, &bs));
32   PetscCall(MatGetSize(B, &M, &N));
33   PetscCall(MatGetLocalSize(B, &m, &n));
34   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
35   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
36   PetscCall(MatSetBlockSize(preallocator, bs));
37   PetscCall(MatSetSizes(preallocator, m, n, M, N));
38   PetscCall(MatSetUp(preallocator));
39   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
40   for (r = rstart; r < rend; ++r) {
41     PetscInt           ncols;
42     const PetscInt    *row;
43     const PetscScalar *vals;
44 
45     for (i = 0; i < nm; i++) {
46       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
47       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
48       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
49       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
50     }
51   }
52   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
53   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
54   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
55   PetscCall(MatDestroy(&preallocator));
56   PetscFunctionReturn(0);
57 }
58 
59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat      B;
62   PetscInt r;
63 
64   PetscFunctionBegin;
65   if (reuse != MAT_REUSE_MATRIX) {
66     PetscBool symm = PETSC_TRUE, isdense;
67     PetscInt  bs;
68 
69     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
70     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
71     PetscCall(MatSetType(B, newtype));
72     PetscCall(MatGetBlockSize(A, &bs));
73     PetscCall(MatSetBlockSize(B, bs));
74     PetscCall(PetscLayoutSetUp(B->rmap));
75     PetscCall(PetscLayoutSetUp(B->cmap));
76     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
77     if (!isdense) {
78       PetscCall(MatGetRowUpperTriangular(A));
79       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
80       PetscCall(MatRestoreRowUpperTriangular(A));
81     } else {
82       PetscCall(MatSetUp(B));
83     }
84   } else {
85     B = *newmat;
86     PetscCall(MatZeroEntries(B));
87   }
88 
89   PetscCall(MatGetRowUpperTriangular(A));
90   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
91     PetscInt           ncols;
92     const PetscInt    *row;
93     const PetscScalar *vals;
94 
95     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
96     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
97 #if defined(PETSC_USE_COMPLEX)
98     if (A->hermitian == PETSC_BOOL3_TRUE) {
99       PetscInt i;
100       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
101     } else {
102       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
103     }
104 #else
105     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
106 #endif
107     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
108   }
109   PetscCall(MatRestoreRowUpperTriangular(A));
110   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
111   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
112 
113   if (reuse == MAT_INPLACE_MATRIX) {
114     PetscCall(MatHeaderReplace(A, &B));
115   } else {
116     *newmat = B;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
122 {
123   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
124 
125   PetscFunctionBegin;
126   PetscCall(MatStoreValues(aij->A));
127   PetscCall(MatStoreValues(aij->B));
128   PetscFunctionReturn(0);
129 }
130 
131 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
132 {
133   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
134 
135   PetscFunctionBegin;
136   PetscCall(MatRetrieveValues(aij->A));
137   PetscCall(MatRetrieveValues(aij->B));
138   PetscFunctionReturn(0);
139 }
140 
141 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
142   { \
143     brow = row / bs; \
144     rp   = aj + ai[brow]; \
145     ap   = aa + bs2 * ai[brow]; \
146     rmax = aimax[brow]; \
147     nrow = ailen[brow]; \
148     bcol = col / bs; \
149     ridx = row % bs; \
150     cidx = col % bs; \
151     low  = 0; \
152     high = nrow; \
153     while (high - low > 3) { \
154       t = (low + high) / 2; \
155       if (rp[t] > bcol) high = t; \
156       else low = t; \
157     } \
158     for (_i = low; _i < high; _i++) { \
159       if (rp[_i] > bcol) break; \
160       if (rp[_i] == bcol) { \
161         bap = ap + bs2 * _i + bs * cidx + ridx; \
162         if (addv == ADD_VALUES) *bap += value; \
163         else *bap = value; \
164         goto a_noinsert; \
165       } \
166     } \
167     if (a->nonew == 1) goto a_noinsert; \
168     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
170     N = nrow++ - 1; \
171     /* shift up all the later entries in this row */ \
172     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
173     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
174     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
175     rp[_i]                          = bcol; \
176     ap[bs2 * _i + bs * cidx + ridx] = value; \
177     A->nonzerostate++; \
178   a_noinsert:; \
179     ailen[brow] = nrow; \
180   }
181 
182 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
183   { \
184     brow = row / bs; \
185     rp   = bj + bi[brow]; \
186     ap   = ba + bs2 * bi[brow]; \
187     rmax = bimax[brow]; \
188     nrow = bilen[brow]; \
189     bcol = col / bs; \
190     ridx = row % bs; \
191     cidx = col % bs; \
192     low  = 0; \
193     high = nrow; \
194     while (high - low > 3) { \
195       t = (low + high) / 2; \
196       if (rp[t] > bcol) high = t; \
197       else low = t; \
198     } \
199     for (_i = low; _i < high; _i++) { \
200       if (rp[_i] > bcol) break; \
201       if (rp[_i] == bcol) { \
202         bap = ap + bs2 * _i + bs * cidx + ridx; \
203         if (addv == ADD_VALUES) *bap += value; \
204         else *bap = value; \
205         goto b_noinsert; \
206       } \
207     } \
208     if (b->nonew == 1) goto b_noinsert; \
209     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
210     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
211     N = nrow++ - 1; \
212     /* shift up all the later entries in this row */ \
213     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
214     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
215     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
216     rp[_i]                          = bcol; \
217     ap[bs2 * _i + bs * cidx + ridx] = value; \
218     B->nonzerostate++; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   }
222 
223 /* Only add/insert a(i,j) with i<=j (blocks).
224    Any a(i,j) with i>j input by user is ingored or generates an error
225 */
226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
227 {
228   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
229   MatScalar     value;
230   PetscBool     roworiented = baij->roworiented;
231   PetscInt      i, j, row, col;
232   PetscInt      rstart_orig = mat->rmap->rstart;
233   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
234   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
235 
236   /* Some Variables required in the macro */
237   Mat           A     = baij->A;
238   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
239   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
240   MatScalar    *aa = a->a;
241 
242   Mat          B     = baij->B;
243   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
244   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
245   MatScalar   *ba = b->a;
246 
247   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
248   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
249   MatScalar *ap, *bap;
250 
251   /* for stash */
252   PetscInt   n_loc, *in_loc = NULL;
253   MatScalar *v_loc = NULL;
254 
255   PetscFunctionBegin;
256   if (!baij->donotstash) {
257     if (n > baij->n_loc) {
258       PetscCall(PetscFree(baij->in_loc));
259       PetscCall(PetscFree(baij->v_loc));
260       PetscCall(PetscMalloc1(n, &baij->in_loc));
261       PetscCall(PetscMalloc1(n, &baij->v_loc));
262 
263       baij->n_loc = n;
264     }
265     in_loc = baij->in_loc;
266     v_loc  = baij->v_loc;
267   }
268 
269   for (i = 0; i < m; i++) {
270     if (im[i] < 0) continue;
271     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
272     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
273       row = im[i] - rstart_orig;                     /* local row index */
274       for (j = 0; j < n; j++) {
275         if (im[i] / bs > in[j] / bs) {
276           if (a->ignore_ltriangular) {
277             continue; /* ignore lower triangular blocks */
278           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
279         }
280         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
281           col  = in[j] - cstart_orig;                    /* local col index */
282           brow = row / bs;
283           bcol = col / bs;
284           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
285           if (roworiented) value = v[i * n + j];
286           else value = v[i + j * m];
287           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
288           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
289         } else if (in[j] < 0) {
290           continue;
291         } else {
292           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
293           /* off-diag entry (B) */
294           if (mat->was_assembled) {
295             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
296 #if defined(PETSC_USE_CTABLE)
297             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
298             col = col - 1;
299 #else
300             col = baij->colmap[in[j] / bs] - 1;
301 #endif
302             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
303               PetscCall(MatDisAssemble_MPISBAIJ(mat));
304               col = in[j];
305               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306               B     = baij->B;
307               b     = (Mat_SeqBAIJ *)(B)->data;
308               bimax = b->imax;
309               bi    = b->i;
310               bilen = b->ilen;
311               bj    = b->j;
312               ba    = b->a;
313             } else col += in[j] % bs;
314           } else col = in[j];
315           if (roworiented) value = v[i * n + j];
316           else value = v[i + j * m];
317           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
318           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
319         }
320       }
321     } else { /* off processor entry */
322       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
323       if (!baij->donotstash) {
324         mat->assembled = PETSC_FALSE;
325         n_loc          = 0;
326         for (j = 0; j < n; j++) {
327           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
328           in_loc[n_loc] = in[j];
329           if (roworiented) {
330             v_loc[n_loc] = v[i * n + j];
331           } else {
332             v_loc[n_loc] = v[j * m + i];
333           }
334           n_loc++;
335         }
336         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
337       }
338     }
339   }
340   PetscFunctionReturn(0);
341 }
342 
343 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
344 {
345   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
346   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
347   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
348   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
349   PetscBool          roworiented = a->roworiented;
350   const PetscScalar *value       = v;
351   MatScalar         *ap, *aa = a->a, *bap;
352 
353   PetscFunctionBegin;
354   if (col < row) {
355     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
356     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
357   }
358   rp    = aj + ai[row];
359   ap    = aa + bs2 * ai[row];
360   rmax  = imax[row];
361   nrow  = ailen[row];
362   value = v;
363   low   = 0;
364   high  = nrow;
365 
366   while (high - low > 7) {
367     t = (low + high) / 2;
368     if (rp[t] > col) high = t;
369     else low = t;
370   }
371   for (i = low; i < high; i++) {
372     if (rp[i] > col) break;
373     if (rp[i] == col) {
374       bap = ap + bs2 * i;
375       if (roworiented) {
376         if (is == ADD_VALUES) {
377           for (ii = 0; ii < bs; ii++) {
378             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
379           }
380         } else {
381           for (ii = 0; ii < bs; ii++) {
382             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
383           }
384         }
385       } else {
386         if (is == ADD_VALUES) {
387           for (ii = 0; ii < bs; ii++) {
388             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
389           }
390         } else {
391           for (ii = 0; ii < bs; ii++) {
392             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
393           }
394         }
395       }
396       goto noinsert2;
397     }
398   }
399   if (nonew == 1) goto noinsert2;
400   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
401   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
402   N = nrow++ - 1;
403   high++;
404   /* shift up all the later entries in this row */
405   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
406   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
407   rp[i] = col;
408   bap   = ap + bs2 * i;
409   if (roworiented) {
410     for (ii = 0; ii < bs; ii++) {
411       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
412     }
413   } else {
414     for (ii = 0; ii < bs; ii++) {
415       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
416     }
417   }
418 noinsert2:;
419   ailen[row] = nrow;
420   PetscFunctionReturn(0);
421 }
422 
423 /*
424    This routine is exactly duplicated in mpibaij.c
425 */
426 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
427 {
428   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
429   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
430   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
431   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
432   PetscBool          roworiented = a->roworiented;
433   const PetscScalar *value       = v;
434   MatScalar         *ap, *aa = a->a, *bap;
435 
436   PetscFunctionBegin;
437   rp    = aj + ai[row];
438   ap    = aa + bs2 * ai[row];
439   rmax  = imax[row];
440   nrow  = ailen[row];
441   low   = 0;
442   high  = nrow;
443   value = v;
444   while (high - low > 7) {
445     t = (low + high) / 2;
446     if (rp[t] > col) high = t;
447     else low = t;
448   }
449   for (i = low; i < high; i++) {
450     if (rp[i] > col) break;
451     if (rp[i] == col) {
452       bap = ap + bs2 * i;
453       if (roworiented) {
454         if (is == ADD_VALUES) {
455           for (ii = 0; ii < bs; ii++) {
456             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
457           }
458         } else {
459           for (ii = 0; ii < bs; ii++) {
460             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
461           }
462         }
463       } else {
464         if (is == ADD_VALUES) {
465           for (ii = 0; ii < bs; ii++, value += bs) {
466             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
467             bap += bs;
468           }
469         } else {
470           for (ii = 0; ii < bs; ii++, value += bs) {
471             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
472             bap += bs;
473           }
474         }
475       }
476       goto noinsert2;
477     }
478   }
479   if (nonew == 1) goto noinsert2;
480   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
481   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
482   N = nrow++ - 1;
483   high++;
484   /* shift up all the later entries in this row */
485   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
486   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
487   rp[i] = col;
488   bap   = ap + bs2 * i;
489   if (roworiented) {
490     for (ii = 0; ii < bs; ii++) {
491       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
492     }
493   } else {
494     for (ii = 0; ii < bs; ii++) {
495       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
496     }
497   }
498 noinsert2:;
499   ailen[row] = nrow;
500   PetscFunctionReturn(0);
501 }
502 
503 /*
504     This routine could be optimized by removing the need for the block copy below and passing stride information
505   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
506 */
507 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
508 {
509   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
510   const MatScalar *value;
511   MatScalar       *barray      = baij->barray;
512   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
513   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
514   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
515   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
516 
517   PetscFunctionBegin;
518   if (!barray) {
519     PetscCall(PetscMalloc1(bs2, &barray));
520     baij->barray = barray;
521   }
522 
523   if (roworiented) {
524     stepval = (n - 1) * bs;
525   } else {
526     stepval = (m - 1) * bs;
527   }
528   for (i = 0; i < m; i++) {
529     if (im[i] < 0) continue;
530     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
531     if (im[i] >= rstart && im[i] < rend) {
532       row = im[i] - rstart;
533       for (j = 0; j < n; j++) {
534         if (im[i] > in[j]) {
535           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
536           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
537         }
538         /* If NumCol = 1 then a copy is not required */
539         if ((roworiented) && (n == 1)) {
540           barray = (MatScalar *)v + i * bs2;
541         } else if ((!roworiented) && (m == 1)) {
542           barray = (MatScalar *)v + j * bs2;
543         } else { /* Here a copy is required */
544           if (roworiented) {
545             value = v + i * (stepval + bs) * bs + j * bs;
546           } else {
547             value = v + j * (stepval + bs) * bs + i * bs;
548           }
549           for (ii = 0; ii < bs; ii++, value += stepval) {
550             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
551           }
552           barray -= bs2;
553         }
554 
555         if (in[j] >= cstart && in[j] < cend) {
556           col = in[j] - cstart;
557           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
558         } else if (in[j] < 0) {
559           continue;
560         } else {
561           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
562           if (mat->was_assembled) {
563             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
564 
565 #if defined(PETSC_USE_DEBUG)
566   #if defined(PETSC_USE_CTABLE)
567             {
568               PetscInt data;
569               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
570               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
571             }
572   #else
573             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
574   #endif
575 #endif
576 #if defined(PETSC_USE_CTABLE)
577             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
578             col = (col - 1) / bs;
579 #else
580             col = (baij->colmap[in[j]] - 1) / bs;
581 #endif
582             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
583               PetscCall(MatDisAssemble_MPISBAIJ(mat));
584               col = in[j];
585             }
586           } else col = in[j];
587           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
588         }
589       }
590     } else {
591       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
592       if (!baij->donotstash) {
593         if (roworiented) {
594           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
595         } else {
596           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
597         }
598       }
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
605 {
606   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
607   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
608   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
609 
610   PetscFunctionBegin;
611   for (i = 0; i < m; i++) {
612     if (idxm[i] < 0) continue; /* negative row */
613     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
614     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
615       row = idxm[i] - bsrstart;
616       for (j = 0; j < n; j++) {
617         if (idxn[j] < 0) continue; /* negative column */
618         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
619         if (idxn[j] >= bscstart && idxn[j] < bscend) {
620           col = idxn[j] - bscstart;
621           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
622         } else {
623           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624 #if defined(PETSC_USE_CTABLE)
625           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
626           data--;
627 #else
628           data = baij->colmap[idxn[j] / bs] - 1;
629 #endif
630           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
631           else {
632             col = data + idxn[j] % bs;
633             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
634           }
635         }
636       }
637     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
638   }
639   PetscFunctionReturn(0);
640 }
641 
642 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
643 {
644   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
645   PetscReal     sum[2], *lnorm2;
646 
647   PetscFunctionBegin;
648   if (baij->size == 1) {
649     PetscCall(MatNorm(baij->A, type, norm));
650   } else {
651     if (type == NORM_FROBENIUS) {
652       PetscCall(PetscMalloc1(2, &lnorm2));
653       PetscCall(MatNorm(baij->A, type, lnorm2));
654       *lnorm2 = (*lnorm2) * (*lnorm2);
655       lnorm2++; /* squar power of norm(A) */
656       PetscCall(MatNorm(baij->B, type, lnorm2));
657       *lnorm2 = (*lnorm2) * (*lnorm2);
658       lnorm2--; /* squar power of norm(B) */
659       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
660       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
661       PetscCall(PetscFree(lnorm2));
662     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
663       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
664       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
665       PetscReal    *rsum, *rsum2, vabs;
666       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
667       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
668       MatScalar    *v;
669 
670       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
671       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
672       /* Amat */
673       v  = amat->a;
674       jj = amat->j;
675       for (brow = 0; brow < mbs; brow++) {
676         grow = bs * (rstart + brow);
677         nz   = amat->i[brow + 1] - amat->i[brow];
678         for (bcol = 0; bcol < nz; bcol++) {
679           gcol = bs * (rstart + *jj);
680           jj++;
681           for (col = 0; col < bs; col++) {
682             for (row = 0; row < bs; row++) {
683               vabs = PetscAbsScalar(*v);
684               v++;
685               rsum[gcol + col] += vabs;
686               /* non-diagonal block */
687               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
688             }
689           }
690         }
691         PetscCall(PetscLogFlops(nz * bs * bs));
692       }
693       /* Bmat */
694       v  = bmat->a;
695       jj = bmat->j;
696       for (brow = 0; brow < mbs; brow++) {
697         grow = bs * (rstart + brow);
698         nz   = bmat->i[brow + 1] - bmat->i[brow];
699         for (bcol = 0; bcol < nz; bcol++) {
700           gcol = bs * garray[*jj];
701           jj++;
702           for (col = 0; col < bs; col++) {
703             for (row = 0; row < bs; row++) {
704               vabs = PetscAbsScalar(*v);
705               v++;
706               rsum[gcol + col] += vabs;
707               rsum[grow + row] += vabs;
708             }
709           }
710         }
711         PetscCall(PetscLogFlops(nz * bs * bs));
712       }
713       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
714       *norm = 0.0;
715       for (col = 0; col < mat->cmap->N; col++) {
716         if (rsum2[col] > *norm) *norm = rsum2[col];
717       }
718       PetscCall(PetscFree2(rsum, rsum2));
719     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
725 {
726   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
727   PetscInt      nstash, reallocs;
728 
729   PetscFunctionBegin;
730   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
731 
732   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
733   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
734   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
735   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
736   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
737   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
738   PetscFunctionReturn(0);
739 }
740 
741 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
742 {
743   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
744   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
745   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
746   PetscInt     *row, *col;
747   PetscBool     other_disassembled;
748   PetscMPIInt   n;
749   PetscBool     r1, r2, r3;
750   MatScalar    *val;
751 
752   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
753   PetscFunctionBegin;
754   if (!baij->donotstash && !mat->nooffprocentries) {
755     while (1) {
756       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
757       if (!flg) break;
758 
759       for (i = 0; i < n;) {
760         /* Now identify the consecutive vals belonging to the same row */
761         for (j = i, rstart = row[j]; j < n; j++) {
762           if (row[j] != rstart) break;
763         }
764         if (j < n) ncols = j - i;
765         else ncols = n - i;
766         /* Now assemble all these values with a single function call */
767         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
768         i = j;
769       }
770     }
771     PetscCall(MatStashScatterEnd_Private(&mat->stash));
772     /* Now process the block-stash. Since the values are stashed column-oriented,
773        set the roworiented flag to column oriented, and after MatSetValues()
774        restore the original flags */
775     r1 = baij->roworiented;
776     r2 = a->roworiented;
777     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
778 
779     baij->roworiented = PETSC_FALSE;
780     a->roworiented    = PETSC_FALSE;
781 
782     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
783     while (1) {
784       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
785       if (!flg) break;
786 
787       for (i = 0; i < n;) {
788         /* Now identify the consecutive vals belonging to the same row */
789         for (j = i, rstart = row[j]; j < n; j++) {
790           if (row[j] != rstart) break;
791         }
792         if (j < n) ncols = j - i;
793         else ncols = n - i;
794         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
795         i = j;
796       }
797     }
798     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
799 
800     baij->roworiented = r1;
801     a->roworiented    = r2;
802 
803     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
804   }
805 
806   PetscCall(MatAssemblyBegin(baij->A, mode));
807   PetscCall(MatAssemblyEnd(baij->A, mode));
808 
809   /* determine if any processor has disassembled, if so we must
810      also disassemble ourselves, in order that we may reassemble. */
811   /*
812      if nonzero structure of submatrix B cannot change then we know that
813      no processor disassembled thus we can skip this stuff
814   */
815   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
816     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
817     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
818   }
819 
820   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
821   PetscCall(MatAssemblyBegin(baij->B, mode));
822   PetscCall(MatAssemblyEnd(baij->B, mode));
823 
824   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
825 
826   baij->rowvalues = NULL;
827 
828   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
829   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
830     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
831     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
837 #include <petscdraw.h>
838 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
839 {
840   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
841   PetscInt          bs   = mat->rmap->bs;
842   PetscMPIInt       rank = baij->rank;
843   PetscBool         iascii, isdraw;
844   PetscViewer       sviewer;
845   PetscViewerFormat format;
846 
847   PetscFunctionBegin;
848   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
850   if (iascii) {
851     PetscCall(PetscViewerGetFormat(viewer, &format));
852     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
853       MatInfo info;
854       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
855       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
856       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
857       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
858                                                    mat->rmap->bs, (double)info.memory));
859       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
860       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
861       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
862       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
863       PetscCall(PetscViewerFlush(viewer));
864       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
865       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
866       PetscCall(VecScatterView(baij->Mvctx, viewer));
867       PetscFunctionReturn(0);
868     } else if (format == PETSC_VIEWER_ASCII_INFO) {
869       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
870       PetscFunctionReturn(0);
871     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
872       PetscFunctionReturn(0);
873     }
874   }
875 
876   if (isdraw) {
877     PetscDraw draw;
878     PetscBool isnull;
879     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
880     PetscCall(PetscDrawIsNull(draw, &isnull));
881     if (isnull) PetscFunctionReturn(0);
882   }
883 
884   {
885     /* assemble the entire matrix onto first processor. */
886     Mat           A;
887     Mat_SeqSBAIJ *Aloc;
888     Mat_SeqBAIJ  *Bloc;
889     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
890     MatScalar    *a;
891     const char   *matname;
892 
893     /* Should this be the same type as mat? */
894     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
895     if (rank == 0) {
896       PetscCall(MatSetSizes(A, M, N, M, N));
897     } else {
898       PetscCall(MatSetSizes(A, 0, 0, M, N));
899     }
900     PetscCall(MatSetType(A, MATMPISBAIJ));
901     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
902     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
903 
904     /* copy over the A part */
905     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
906     ai   = Aloc->i;
907     aj   = Aloc->j;
908     a    = Aloc->a;
909     PetscCall(PetscMalloc1(bs, &rvals));
910 
911     for (i = 0; i < mbs; i++) {
912       rvals[0] = bs * (baij->rstartbs + i);
913       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
914       for (j = ai[i]; j < ai[i + 1]; j++) {
915         col = (baij->cstartbs + aj[j]) * bs;
916         for (k = 0; k < bs; k++) {
917           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
918           col++;
919           a += bs;
920         }
921       }
922     }
923     /* copy over the B part */
924     Bloc = (Mat_SeqBAIJ *)baij->B->data;
925     ai   = Bloc->i;
926     aj   = Bloc->j;
927     a    = Bloc->a;
928     for (i = 0; i < mbs; i++) {
929       rvals[0] = bs * (baij->rstartbs + i);
930       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
931       for (j = ai[i]; j < ai[i + 1]; j++) {
932         col = baij->garray[aj[j]] * bs;
933         for (k = 0; k < bs; k++) {
934           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
935           col++;
936           a += bs;
937         }
938       }
939     }
940     PetscCall(PetscFree(rvals));
941     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
942     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
943     /*
944        Everyone has to call to draw the matrix since the graphics waits are
945        synchronized across all processors that share the PetscDraw object
946     */
947     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
948     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
949     if (rank == 0) {
950       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
951       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
952     }
953     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
954     PetscCall(PetscViewerFlush(viewer));
955     PetscCall(MatDestroy(&A));
956   }
957   PetscFunctionReturn(0);
958 }
959 
960 /* Used for both MPIBAIJ and MPISBAIJ matrices */
961 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
962 
963 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
964 {
965   PetscBool iascii, isdraw, issocket, isbinary;
966 
967   PetscFunctionBegin;
968   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
969   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
970   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
971   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
972   if (iascii || isdraw || issocket) {
973     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
974   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
975   PetscFunctionReturn(0);
976 }
977 
978 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
979 {
980   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
981 
982   PetscFunctionBegin;
983 #if defined(PETSC_USE_LOG)
984   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
985 #endif
986   PetscCall(MatStashDestroy_Private(&mat->stash));
987   PetscCall(MatStashDestroy_Private(&mat->bstash));
988   PetscCall(MatDestroy(&baij->A));
989   PetscCall(MatDestroy(&baij->B));
990 #if defined(PETSC_USE_CTABLE)
991   PetscCall(PetscTableDestroy(&baij->colmap));
992 #else
993   PetscCall(PetscFree(baij->colmap));
994 #endif
995   PetscCall(PetscFree(baij->garray));
996   PetscCall(VecDestroy(&baij->lvec));
997   PetscCall(VecScatterDestroy(&baij->Mvctx));
998   PetscCall(VecDestroy(&baij->slvec0));
999   PetscCall(VecDestroy(&baij->slvec0b));
1000   PetscCall(VecDestroy(&baij->slvec1));
1001   PetscCall(VecDestroy(&baij->slvec1a));
1002   PetscCall(VecDestroy(&baij->slvec1b));
1003   PetscCall(VecScatterDestroy(&baij->sMvctx));
1004   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1005   PetscCall(PetscFree(baij->barray));
1006   PetscCall(PetscFree(baij->hd));
1007   PetscCall(VecDestroy(&baij->diag));
1008   PetscCall(VecDestroy(&baij->bb1));
1009   PetscCall(VecDestroy(&baij->xx1));
1010 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1011   PetscCall(PetscFree(baij->setvaluescopy));
1012 #endif
1013   PetscCall(PetscFree(baij->in_loc));
1014   PetscCall(PetscFree(baij->v_loc));
1015   PetscCall(PetscFree(baij->rangebs));
1016   PetscCall(PetscFree(mat->data));
1017 
1018   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
1023 #if defined(PETSC_HAVE_ELEMENTAL)
1024   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
1025 #endif
1026 #if defined(PETSC_HAVE_SCALAPACK)
1027   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
1028 #endif
1029   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
1030   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1035 {
1036   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1037   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1038   PetscScalar       *from;
1039   const PetscScalar *x;
1040 
1041   PetscFunctionBegin;
1042   /* diagonal part */
1043   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1044   PetscCall(VecSet(a->slvec1b, 0.0));
1045 
1046   /* subdiagonal part */
1047   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1048   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1049 
1050   /* copy x into the vec slvec0 */
1051   PetscCall(VecGetArray(a->slvec0, &from));
1052   PetscCall(VecGetArrayRead(xx, &x));
1053 
1054   PetscCall(PetscArraycpy(from, x, bs * mbs));
1055   PetscCall(VecRestoreArray(a->slvec0, &from));
1056   PetscCall(VecRestoreArrayRead(xx, &x));
1057 
1058   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1059   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1060   /* supperdiagonal part */
1061   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1066 {
1067   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1068   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1069   PetscScalar       *from;
1070   const PetscScalar *x;
1071 
1072   PetscFunctionBegin;
1073   /* diagonal part */
1074   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1075   PetscCall(VecSet(a->slvec1b, 0.0));
1076 
1077   /* subdiagonal part */
1078   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1079 
1080   /* copy x into the vec slvec0 */
1081   PetscCall(VecGetArray(a->slvec0, &from));
1082   PetscCall(VecGetArrayRead(xx, &x));
1083 
1084   PetscCall(PetscArraycpy(from, x, bs * mbs));
1085   PetscCall(VecRestoreArray(a->slvec0, &from));
1086   PetscCall(VecRestoreArrayRead(xx, &x));
1087 
1088   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1089   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1090   /* supperdiagonal part */
1091   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1096 {
1097   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1098   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1099   PetscScalar       *from, zero       = 0.0;
1100   const PetscScalar *x;
1101 
1102   PetscFunctionBegin;
1103   /* diagonal part */
1104   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1105   PetscCall(VecSet(a->slvec1b, zero));
1106 
1107   /* subdiagonal part */
1108   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1109   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1110 
1111   /* copy x into the vec slvec0 */
1112   PetscCall(VecGetArray(a->slvec0, &from));
1113   PetscCall(VecGetArrayRead(xx, &x));
1114   PetscCall(PetscArraycpy(from, x, bs * mbs));
1115   PetscCall(VecRestoreArray(a->slvec0, &from));
1116 
1117   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1118   PetscCall(VecRestoreArrayRead(xx, &x));
1119   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1120 
1121   /* supperdiagonal part */
1122   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1127 {
1128   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1129   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1130   PetscScalar       *from, zero       = 0.0;
1131   const PetscScalar *x;
1132 
1133   PetscFunctionBegin;
1134   /* diagonal part */
1135   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1136   PetscCall(VecSet(a->slvec1b, zero));
1137 
1138   /* subdiagonal part */
1139   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1140 
1141   /* copy x into the vec slvec0 */
1142   PetscCall(VecGetArray(a->slvec0, &from));
1143   PetscCall(VecGetArrayRead(xx, &x));
1144   PetscCall(PetscArraycpy(from, x, bs * mbs));
1145   PetscCall(VecRestoreArray(a->slvec0, &from));
1146 
1147   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1148   PetscCall(VecRestoreArrayRead(xx, &x));
1149   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1150 
1151   /* supperdiagonal part */
1152   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 /*
1157   This only works correctly for square matrices where the subblock A->A is the
1158    diagonal block
1159 */
1160 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1161 {
1162   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1163 
1164   PetscFunctionBegin;
1165   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1166   PetscCall(MatGetDiagonal(a->A, v));
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1171 {
1172   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1173 
1174   PetscFunctionBegin;
1175   PetscCall(MatScale(a->A, aa));
1176   PetscCall(MatScale(a->B, aa));
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1181 {
1182   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1183   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1184   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1185   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1186   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1187 
1188   PetscFunctionBegin;
1189   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1190   mat->getrowactive = PETSC_TRUE;
1191 
1192   if (!mat->rowvalues && (idx || v)) {
1193     /*
1194         allocate enough space to hold information from the longest row.
1195     */
1196     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1197     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1198     PetscInt      max = 1, mbs = mat->mbs, tmp;
1199     for (i = 0; i < mbs; i++) {
1200       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1201       if (max < tmp) max = tmp;
1202     }
1203     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1204   }
1205 
1206   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1207   lrow = row - brstart; /* local row index */
1208 
1209   pvA = &vworkA;
1210   pcA = &cworkA;
1211   pvB = &vworkB;
1212   pcB = &cworkB;
1213   if (!v) {
1214     pvA = NULL;
1215     pvB = NULL;
1216   }
1217   if (!idx) {
1218     pcA = NULL;
1219     if (!v) pcB = NULL;
1220   }
1221   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1222   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1223   nztot = nzA + nzB;
1224 
1225   cmap = mat->garray;
1226   if (v || idx) {
1227     if (nztot) {
1228       /* Sort by increasing column numbers, assuming A and B already sorted */
1229       PetscInt imark = -1;
1230       if (v) {
1231         *v = v_p = mat->rowvalues;
1232         for (i = 0; i < nzB; i++) {
1233           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1234           else break;
1235         }
1236         imark = i;
1237         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1238         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1239       }
1240       if (idx) {
1241         *idx = idx_p = mat->rowindices;
1242         if (imark > -1) {
1243           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1244         } else {
1245           for (i = 0; i < nzB; i++) {
1246             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1247             else break;
1248           }
1249           imark = i;
1250         }
1251         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1252         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253       }
1254     } else {
1255       if (idx) *idx = NULL;
1256       if (v) *v = NULL;
1257     }
1258   }
1259   *nz = nztot;
1260   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1261   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1266 {
1267   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1268 
1269   PetscFunctionBegin;
1270   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1271   baij->getrowactive = PETSC_FALSE;
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1276 {
1277   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1278   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1279 
1280   PetscFunctionBegin;
1281   aA->getrow_utriangular = PETSC_TRUE;
1282   PetscFunctionReturn(0);
1283 }
1284 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1285 {
1286   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1287   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1288 
1289   PetscFunctionBegin;
1290   aA->getrow_utriangular = PETSC_FALSE;
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1295 {
1296   PetscFunctionBegin;
1297   if (PetscDefined(USE_COMPLEX)) {
1298     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1299 
1300     PetscCall(MatConjugate(a->A));
1301     PetscCall(MatConjugate(a->B));
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1307 {
1308   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1309 
1310   PetscFunctionBegin;
1311   PetscCall(MatRealPart(a->A));
1312   PetscCall(MatRealPart(a->B));
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1317 {
1318   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1319 
1320   PetscFunctionBegin;
1321   PetscCall(MatImaginaryPart(a->A));
1322   PetscCall(MatImaginaryPart(a->B));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1327    Input: isrow       - distributed(parallel),
1328           iscol_local - locally owned (seq)
1329 */
1330 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1331 {
1332   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1333   const PetscInt *ptr1, *ptr2;
1334 
1335   PetscFunctionBegin;
1336   PetscCall(ISGetLocalSize(isrow, &sz1));
1337   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1338   if (sz1 > sz2) {
1339     *flg = PETSC_FALSE;
1340     PetscFunctionReturn(0);
1341   }
1342 
1343   PetscCall(ISGetIndices(isrow, &ptr1));
1344   PetscCall(ISGetIndices(iscol_local, &ptr2));
1345 
1346   PetscCall(PetscMalloc1(sz1, &a1));
1347   PetscCall(PetscMalloc1(sz2, &a2));
1348   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1349   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1350   PetscCall(PetscSortInt(sz1, a1));
1351   PetscCall(PetscSortInt(sz2, a2));
1352 
1353   nmatch = 0;
1354   k      = 0;
1355   for (i = 0; i < sz1; i++) {
1356     for (j = k; j < sz2; j++) {
1357       if (a1[i] == a2[j]) {
1358         k = j;
1359         nmatch++;
1360         break;
1361       }
1362     }
1363   }
1364   PetscCall(ISRestoreIndices(isrow, &ptr1));
1365   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1366   PetscCall(PetscFree(a1));
1367   PetscCall(PetscFree(a2));
1368   if (nmatch < sz1) {
1369     *flg = PETSC_FALSE;
1370   } else {
1371     *flg = PETSC_TRUE;
1372   }
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1377 {
1378   IS        iscol_local;
1379   PetscInt  csize;
1380   PetscBool isequal;
1381 
1382   PetscFunctionBegin;
1383   PetscCall(ISGetLocalSize(iscol, &csize));
1384   if (call == MAT_REUSE_MATRIX) {
1385     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1386     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1387   } else {
1388     PetscBool issorted;
1389 
1390     PetscCall(ISAllGather(iscol, &iscol_local));
1391     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1392     PetscCall(ISSorted(iscol_local, &issorted));
1393     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1394   }
1395 
1396   /* now call MatCreateSubMatrix_MPIBAIJ() */
1397   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1398   if (call == MAT_INITIAL_MATRIX) {
1399     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1400     PetscCall(ISDestroy(&iscol_local));
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1406 {
1407   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1408 
1409   PetscFunctionBegin;
1410   PetscCall(MatZeroEntries(l->A));
1411   PetscCall(MatZeroEntries(l->B));
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1416 {
1417   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1418   Mat            A = a->A, B = a->B;
1419   PetscLogDouble isend[5], irecv[5];
1420 
1421   PetscFunctionBegin;
1422   info->block_size = (PetscReal)matin->rmap->bs;
1423 
1424   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1425 
1426   isend[0] = info->nz_used;
1427   isend[1] = info->nz_allocated;
1428   isend[2] = info->nz_unneeded;
1429   isend[3] = info->memory;
1430   isend[4] = info->mallocs;
1431 
1432   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1433 
1434   isend[0] += info->nz_used;
1435   isend[1] += info->nz_allocated;
1436   isend[2] += info->nz_unneeded;
1437   isend[3] += info->memory;
1438   isend[4] += info->mallocs;
1439   if (flag == MAT_LOCAL) {
1440     info->nz_used      = isend[0];
1441     info->nz_allocated = isend[1];
1442     info->nz_unneeded  = isend[2];
1443     info->memory       = isend[3];
1444     info->mallocs      = isend[4];
1445   } else if (flag == MAT_GLOBAL_MAX) {
1446     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else if (flag == MAT_GLOBAL_SUM) {
1454     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1455 
1456     info->nz_used      = irecv[0];
1457     info->nz_allocated = irecv[1];
1458     info->nz_unneeded  = irecv[2];
1459     info->memory       = irecv[3];
1460     info->mallocs      = irecv[4];
1461   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1462   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1463   info->fill_ratio_needed = 0;
1464   info->factor_mallocs    = 0;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1469 {
1470   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1471   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1472 
1473   PetscFunctionBegin;
1474   switch (op) {
1475   case MAT_NEW_NONZERO_LOCATIONS:
1476   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1477   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1478   case MAT_KEEP_NONZERO_PATTERN:
1479   case MAT_SUBMAT_SINGLEIS:
1480   case MAT_NEW_NONZERO_LOCATION_ERR:
1481     MatCheckPreallocated(A, 1);
1482     PetscCall(MatSetOption(a->A, op, flg));
1483     PetscCall(MatSetOption(a->B, op, flg));
1484     break;
1485   case MAT_ROW_ORIENTED:
1486     MatCheckPreallocated(A, 1);
1487     a->roworiented = flg;
1488 
1489     PetscCall(MatSetOption(a->A, op, flg));
1490     PetscCall(MatSetOption(a->B, op, flg));
1491     break;
1492   case MAT_FORCE_DIAGONAL_ENTRIES:
1493   case MAT_SORTED_FULL:
1494     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1495     break;
1496   case MAT_IGNORE_OFF_PROC_ENTRIES:
1497     a->donotstash = flg;
1498     break;
1499   case MAT_USE_HASH_TABLE:
1500     a->ht_flag = flg;
1501     break;
1502   case MAT_HERMITIAN:
1503     MatCheckPreallocated(A, 1);
1504     PetscCall(MatSetOption(a->A, op, flg));
1505 #if defined(PETSC_USE_COMPLEX)
1506     if (flg) { /* need different mat-vec ops */
1507       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1508       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1509       A->ops->multtranspose    = NULL;
1510       A->ops->multtransposeadd = NULL;
1511       A->symmetric             = PETSC_BOOL3_FALSE;
1512     }
1513 #endif
1514     break;
1515   case MAT_SPD:
1516   case MAT_SYMMETRIC:
1517     MatCheckPreallocated(A, 1);
1518     PetscCall(MatSetOption(a->A, op, flg));
1519 #if defined(PETSC_USE_COMPLEX)
1520     if (flg) { /* restore to use default mat-vec ops */
1521       A->ops->mult             = MatMult_MPISBAIJ;
1522       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1523       A->ops->multtranspose    = MatMult_MPISBAIJ;
1524       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1525     }
1526 #endif
1527     break;
1528   case MAT_STRUCTURALLY_SYMMETRIC:
1529     MatCheckPreallocated(A, 1);
1530     PetscCall(MatSetOption(a->A, op, flg));
1531     break;
1532   case MAT_SYMMETRY_ETERNAL:
1533   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1534     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1535     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1536     break;
1537   case MAT_SPD_ETERNAL:
1538     break;
1539   case MAT_IGNORE_LOWER_TRIANGULAR:
1540     aA->ignore_ltriangular = flg;
1541     break;
1542   case MAT_ERROR_LOWER_TRIANGULAR:
1543     aA->ignore_ltriangular = flg;
1544     break;
1545   case MAT_GETROW_UPPERTRIANGULAR:
1546     aA->getrow_utriangular = flg;
1547     break;
1548   default:
1549     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1550   }
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1555 {
1556   PetscFunctionBegin;
1557   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1558   if (reuse == MAT_INITIAL_MATRIX) {
1559     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1560   } else if (reuse == MAT_REUSE_MATRIX) {
1561     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1567 {
1568   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1569   Mat           a = baij->A, b = baij->B;
1570   PetscInt      nv, m, n;
1571   PetscBool     flg;
1572 
1573   PetscFunctionBegin;
1574   if (ll != rr) {
1575     PetscCall(VecEqual(ll, rr, &flg));
1576     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1577   }
1578   if (!ll) PetscFunctionReturn(0);
1579 
1580   PetscCall(MatGetLocalSize(mat, &m, &n));
1581   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1582 
1583   PetscCall(VecGetLocalSize(rr, &nv));
1584   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1585 
1586   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1587 
1588   /* left diagonalscale the off-diagonal part */
1589   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1590 
1591   /* scale the diagonal part */
1592   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1593 
1594   /* right diagonalscale the off-diagonal part */
1595   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1596   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1601 {
1602   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1603 
1604   PetscFunctionBegin;
1605   PetscCall(MatSetUnfactored(a->A));
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1610 
1611 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1612 {
1613   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1614   Mat           a, b, c, d;
1615   PetscBool     flg;
1616 
1617   PetscFunctionBegin;
1618   a = matA->A;
1619   b = matA->B;
1620   c = matB->A;
1621   d = matB->B;
1622 
1623   PetscCall(MatEqual(a, c, &flg));
1624   if (flg) PetscCall(MatEqual(b, d, &flg));
1625   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1630 {
1631   PetscBool isbaij;
1632 
1633   PetscFunctionBegin;
1634   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1635   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1636   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1637   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1638     PetscCall(MatGetRowUpperTriangular(A));
1639     PetscCall(MatCopy_Basic(A, B, str));
1640     PetscCall(MatRestoreRowUpperTriangular(A));
1641   } else {
1642     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1643     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1644 
1645     PetscCall(MatCopy(a->A, b->A, str));
1646     PetscCall(MatCopy(a->B, b->B, str));
1647   }
1648   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1653 {
1654   PetscFunctionBegin;
1655   PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1660 {
1661   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1662   PetscBLASInt  bnz, one                          = 1;
1663   Mat_SeqSBAIJ *xa, *ya;
1664   Mat_SeqBAIJ  *xb, *yb;
1665 
1666   PetscFunctionBegin;
1667   if (str == SAME_NONZERO_PATTERN) {
1668     PetscScalar alpha = a;
1669     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1670     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1671     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1672     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1673     xb = (Mat_SeqBAIJ *)xx->B->data;
1674     yb = (Mat_SeqBAIJ *)yy->B->data;
1675     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1676     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1677     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1678   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1679     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1680     PetscCall(MatAXPY_Basic(Y, a, X, str));
1681     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1682   } else {
1683     Mat       B;
1684     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1685     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1686     PetscCall(MatGetRowUpperTriangular(X));
1687     PetscCall(MatGetRowUpperTriangular(Y));
1688     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1689     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1690     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1691     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1692     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1693     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1694     PetscCall(MatSetType(B, MATMPISBAIJ));
1695     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1696     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1697     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1698     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1699     PetscCall(MatHeaderMerge(Y, &B));
1700     PetscCall(PetscFree(nnz_d));
1701     PetscCall(PetscFree(nnz_o));
1702     PetscCall(MatRestoreRowUpperTriangular(X));
1703     PetscCall(MatRestoreRowUpperTriangular(Y));
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1709 {
1710   PetscInt  i;
1711   PetscBool flg;
1712 
1713   PetscFunctionBegin;
1714   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1715   for (i = 0; i < n; i++) {
1716     PetscCall(ISEqual(irow[i], icol[i], &flg));
1717     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1723 {
1724   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1725   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1726 
1727   PetscFunctionBegin;
1728   if (!Y->preallocated) {
1729     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1730   } else if (!aij->nz) {
1731     PetscInt nonew = aij->nonew;
1732     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1733     aij->nonew = nonew;
1734   }
1735   PetscCall(MatShift_Basic(Y, a));
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1740 {
1741   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1742 
1743   PetscFunctionBegin;
1744   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1745   PetscCall(MatMissingDiagonal(a->A, missing, d));
1746   if (d) {
1747     PetscInt rstart;
1748     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1749     *d += rstart / A->rmap->bs;
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1755 {
1756   PetscFunctionBegin;
1757   *a = ((Mat_MPISBAIJ *)A->data)->A;
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 /* -------------------------------------------------------------------*/
1762 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1763                                        MatGetRow_MPISBAIJ,
1764                                        MatRestoreRow_MPISBAIJ,
1765                                        MatMult_MPISBAIJ,
1766                                        /*  4*/ MatMultAdd_MPISBAIJ,
1767                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1768                                        MatMultAdd_MPISBAIJ,
1769                                        NULL,
1770                                        NULL,
1771                                        NULL,
1772                                        /* 10*/ NULL,
1773                                        NULL,
1774                                        NULL,
1775                                        MatSOR_MPISBAIJ,
1776                                        MatTranspose_MPISBAIJ,
1777                                        /* 15*/ MatGetInfo_MPISBAIJ,
1778                                        MatEqual_MPISBAIJ,
1779                                        MatGetDiagonal_MPISBAIJ,
1780                                        MatDiagonalScale_MPISBAIJ,
1781                                        MatNorm_MPISBAIJ,
1782                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1783                                        MatAssemblyEnd_MPISBAIJ,
1784                                        MatSetOption_MPISBAIJ,
1785                                        MatZeroEntries_MPISBAIJ,
1786                                        /* 24*/ NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        NULL,
1791                                        /* 29*/ MatSetUp_MPISBAIJ,
1792                                        NULL,
1793                                        NULL,
1794                                        MatGetDiagonalBlock_MPISBAIJ,
1795                                        NULL,
1796                                        /* 34*/ MatDuplicate_MPISBAIJ,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        NULL,
1801                                        /* 39*/ MatAXPY_MPISBAIJ,
1802                                        MatCreateSubMatrices_MPISBAIJ,
1803                                        MatIncreaseOverlap_MPISBAIJ,
1804                                        MatGetValues_MPISBAIJ,
1805                                        MatCopy_MPISBAIJ,
1806                                        /* 44*/ NULL,
1807                                        MatScale_MPISBAIJ,
1808                                        MatShift_MPISBAIJ,
1809                                        NULL,
1810                                        NULL,
1811                                        /* 49*/ NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        NULL,
1815                                        NULL,
1816                                        /* 54*/ NULL,
1817                                        NULL,
1818                                        MatSetUnfactored_MPISBAIJ,
1819                                        NULL,
1820                                        MatSetValuesBlocked_MPISBAIJ,
1821                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        NULL,
1826                                        /* 64*/ NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        NULL,
1831                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1832                                        NULL,
1833                                        MatConvert_MPISBAIJ_Basic,
1834                                        NULL,
1835                                        NULL,
1836                                        /* 74*/ NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        /* 79*/ NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        NULL,
1845                                        MatLoad_MPISBAIJ,
1846                                        /* 84*/ NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        /* 89*/ NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        /* 94*/ NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        NULL,
1861                                        /* 99*/ NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        MatConjugate_MPISBAIJ,
1865                                        NULL,
1866                                        /*104*/ NULL,
1867                                        MatRealPart_MPISBAIJ,
1868                                        MatImaginaryPart_MPISBAIJ,
1869                                        MatGetRowUpperTriangular_MPISBAIJ,
1870                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1871                                        /*109*/ NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        MatMissingDiagonal_MPISBAIJ,
1876                                        /*114*/ NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        /*119*/ NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        /*124*/ NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        /*129*/ NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        /*134*/ NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        /*139*/ MatSetBlockSizes_Default,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        NULL,
1912                                        /*150*/ NULL};
1913 
1914 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1915 {
1916   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1917   PetscInt      i, mbs, Mbs;
1918   PetscMPIInt   size;
1919 
1920   PetscFunctionBegin;
1921   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1922   PetscCall(PetscLayoutSetUp(B->rmap));
1923   PetscCall(PetscLayoutSetUp(B->cmap));
1924   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1925   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1926   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1927 
1928   mbs = B->rmap->n / bs;
1929   Mbs = B->rmap->N / bs;
1930   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1931 
1932   B->rmap->bs = bs;
1933   b->bs2      = bs * bs;
1934   b->mbs      = mbs;
1935   b->Mbs      = Mbs;
1936   b->nbs      = B->cmap->n / bs;
1937   b->Nbs      = B->cmap->N / bs;
1938 
1939   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1940   b->rstartbs = B->rmap->rstart / bs;
1941   b->rendbs   = B->rmap->rend / bs;
1942 
1943   b->cstartbs = B->cmap->rstart / bs;
1944   b->cendbs   = B->cmap->rend / bs;
1945 
1946 #if defined(PETSC_USE_CTABLE)
1947   PetscCall(PetscTableDestroy(&b->colmap));
1948 #else
1949   PetscCall(PetscFree(b->colmap));
1950 #endif
1951   PetscCall(PetscFree(b->garray));
1952   PetscCall(VecDestroy(&b->lvec));
1953   PetscCall(VecScatterDestroy(&b->Mvctx));
1954   PetscCall(VecDestroy(&b->slvec0));
1955   PetscCall(VecDestroy(&b->slvec0b));
1956   PetscCall(VecDestroy(&b->slvec1));
1957   PetscCall(VecDestroy(&b->slvec1a));
1958   PetscCall(VecDestroy(&b->slvec1b));
1959   PetscCall(VecScatterDestroy(&b->sMvctx));
1960 
1961   /* Because the B will have been resized we simply destroy it and create a new one each time */
1962   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1963   PetscCall(MatDestroy(&b->B));
1964   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1965   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1966   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1967 
1968   if (!B->preallocated) {
1969     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1970     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1971     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1972     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1973   }
1974 
1975   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1976   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1977 
1978   B->preallocated  = PETSC_TRUE;
1979   B->was_assembled = PETSC_FALSE;
1980   B->assembled     = PETSC_FALSE;
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1985 {
1986   PetscInt        m, rstart, cend;
1987   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1988   const PetscInt *JJ          = NULL;
1989   PetscScalar    *values      = NULL;
1990   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1991   PetscBool       nooffprocentries;
1992 
1993   PetscFunctionBegin;
1994   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1995   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1996   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1997   PetscCall(PetscLayoutSetUp(B->rmap));
1998   PetscCall(PetscLayoutSetUp(B->cmap));
1999   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2000   m      = B->rmap->n / bs;
2001   rstart = B->rmap->rstart / bs;
2002   cend   = B->cmap->rend / bs;
2003 
2004   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2005   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2006   for (i = 0; i < m; i++) {
2007     nz = ii[i + 1] - ii[i];
2008     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2009     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2010     JJ = jj + ii[i];
2011     bd = 0;
2012     for (j = 0; j < nz; j++) {
2013       if (*JJ >= i + rstart) break;
2014       JJ++;
2015       bd++;
2016     }
2017     d = 0;
2018     for (; j < nz; j++) {
2019       if (*JJ++ >= cend) break;
2020       d++;
2021     }
2022     d_nnz[i] = d;
2023     o_nnz[i] = nz - d - bd;
2024     nz       = nz - bd;
2025     nz_max   = PetscMax(nz_max, nz);
2026   }
2027   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2028   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2029   PetscCall(PetscFree2(d_nnz, o_nnz));
2030 
2031   values = (PetscScalar *)V;
2032   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2033   for (i = 0; i < m; i++) {
2034     PetscInt        row   = i + rstart;
2035     PetscInt        ncols = ii[i + 1] - ii[i];
2036     const PetscInt *icols = jj + ii[i];
2037     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2038       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2039       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2040     } else { /* block ordering does not match so we can only insert one block at a time. */
2041       PetscInt j;
2042       for (j = 0; j < ncols; j++) {
2043         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2044         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2045       }
2046     }
2047   }
2048 
2049   if (!V) PetscCall(PetscFree(values));
2050   nooffprocentries    = B->nooffprocentries;
2051   B->nooffprocentries = PETSC_TRUE;
2052   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2053   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2054   B->nooffprocentries = nooffprocentries;
2055 
2056   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 /*MC
2061    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2062    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2063    the matrix is stored.
2064 
2065    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2066    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2067 
2068    Options Database Keys:
2069 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2070 
2071    Note:
2072      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2073      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2074 
2075    Level: beginner
2076 
2077 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2078 M*/
2079 
2080 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2081 {
2082   Mat_MPISBAIJ *b;
2083   PetscBool     flg = PETSC_FALSE;
2084 
2085   PetscFunctionBegin;
2086   PetscCall(PetscNew(&b));
2087   B->data = (void *)b;
2088   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2089 
2090   B->ops->destroy = MatDestroy_MPISBAIJ;
2091   B->ops->view    = MatView_MPISBAIJ;
2092   B->assembled    = PETSC_FALSE;
2093   B->insertmode   = NOT_SET_VALUES;
2094 
2095   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2096   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2097 
2098   /* build local table of row and column ownerships */
2099   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2100 
2101   /* build cache for off array entries formed */
2102   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2103 
2104   b->donotstash  = PETSC_FALSE;
2105   b->colmap      = NULL;
2106   b->garray      = NULL;
2107   b->roworiented = PETSC_TRUE;
2108 
2109   /* stuff used in block assembly */
2110   b->barray = NULL;
2111 
2112   /* stuff used for matrix vector multiply */
2113   b->lvec    = NULL;
2114   b->Mvctx   = NULL;
2115   b->slvec0  = NULL;
2116   b->slvec0b = NULL;
2117   b->slvec1  = NULL;
2118   b->slvec1a = NULL;
2119   b->slvec1b = NULL;
2120   b->sMvctx  = NULL;
2121 
2122   /* stuff for MatGetRow() */
2123   b->rowindices   = NULL;
2124   b->rowvalues    = NULL;
2125   b->getrowactive = PETSC_FALSE;
2126 
2127   /* hash table stuff */
2128   b->ht           = NULL;
2129   b->hd           = NULL;
2130   b->ht_size      = 0;
2131   b->ht_flag      = PETSC_FALSE;
2132   b->ht_fact      = 0;
2133   b->ht_total_ct  = 0;
2134   b->ht_insert_ct = 0;
2135 
2136   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2137   b->ijonly = PETSC_FALSE;
2138 
2139   b->in_loc = NULL;
2140   b->v_loc  = NULL;
2141   b->n_loc  = 0;
2142 
2143   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2144   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2145   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2147 #if defined(PETSC_HAVE_ELEMENTAL)
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2149 #endif
2150 #if defined(PETSC_HAVE_SCALAPACK)
2151   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2152 #endif
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2155 
2156   B->symmetric                   = PETSC_BOOL3_TRUE;
2157   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2158   B->symmetry_eternal            = PETSC_TRUE;
2159   B->structural_symmetry_eternal = PETSC_TRUE;
2160 #if defined(PETSC_USE_COMPLEX)
2161   B->hermitian = PETSC_BOOL3_FALSE;
2162 #else
2163   B->hermitian = PETSC_BOOL3_TRUE;
2164 #endif
2165 
2166   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2167   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2168   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2169   if (flg) {
2170     PetscReal fact = 1.39;
2171     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2172     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2173     if (fact <= 1.0) fact = 1.39;
2174     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2175     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2176   }
2177   PetscOptionsEnd();
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 /*MC
2182    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2183 
2184    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2185    and `MATMPISBAIJ` otherwise.
2186 
2187    Options Database Key:
2188 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2189 
2190   Level: beginner
2191 
2192 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2193 M*/
2194 
2195 /*@C
2196    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2197    the user should preallocate the matrix storage by setting the parameters
2198    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2199    performance can be increased by more than a factor of 50.
2200 
2201    Collective
2202 
2203    Input Parameters:
2204 +  B - the matrix
2205 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2206           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2207 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2208            submatrix  (same for all local rows)
2209 .  d_nnz - array containing the number of block nonzeros in the various block rows
2210            in the upper triangular and diagonal part of the in diagonal portion of the local
2211            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2212            for the diagonal entry and set a value even if it is zero.
2213 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2214            submatrix (same for all local rows).
2215 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2216            off-diagonal portion of the local submatrix that is right of the diagonal
2217            (possibly different for each block row) or NULL.
2218 
2219    Options Database Keys:
2220 +   -mat_no_unroll - uses code that does not unroll the loops in the
2221                      block calculations (much slower)
2222 -   -mat_block_size - size of the blocks to use
2223 
2224    Notes:
2225 
2226    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2227    than it must be used on all processors that share the object for that argument.
2228 
2229    If the *_nnz parameter is given then the *_nz parameter is ignored
2230 
2231    Storage Information:
2232    For a square global matrix we define each processor's diagonal portion
2233    to be its local rows and the corresponding columns (a square submatrix);
2234    each processor's off-diagonal portion encompasses the remainder of the
2235    local matrix (a rectangular submatrix).
2236 
2237    The user can specify preallocated storage for the diagonal part of
2238    the local submatrix with either d_nz or d_nnz (not both).  Set
2239    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2240    memory allocation.  Likewise, specify preallocated storage for the
2241    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2242 
2243    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2244    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2245    You can also run with the option -info and look for messages with the string
2246    malloc in them to see if additional memory allocation was needed.
2247 
2248    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2249    the figure below we depict these three local rows and all columns (0-11).
2250 
2251 .vb
2252            0 1 2 3 4 5 6 7 8 9 10 11
2253           --------------------------
2254    row 3  |. . . d d d o o o o  o  o
2255    row 4  |. . . d d d o o o o  o  o
2256    row 5  |. . . d d d o o o o  o  o
2257           --------------------------
2258 .ve
2259 
2260    Thus, any entries in the d locations are stored in the d (diagonal)
2261    submatrix, and any entries in the o locations are stored in the
2262    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2263    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2264 
2265    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2266    plus the diagonal part of the d matrix,
2267    and o_nz should indicate the number of block nonzeros per row in the o matrix
2268 
2269    In general, for PDE problems in which most nonzeros are near the diagonal,
2270    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2271    or you will get TERRIBLE performance; see the users' manual chapter on
2272    matrices.
2273 
2274    Level: intermediate
2275 
2276 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2277 @*/
2278 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2279 {
2280   PetscFunctionBegin;
2281   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2282   PetscValidType(B, 1);
2283   PetscValidLogicalCollectiveInt(B, bs, 2);
2284   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 /*@C
2289    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2290    (block compressed row).  For good matrix assembly performance
2291    the user should preallocate the matrix storage by setting the parameters
2292    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2293    performance can be increased by more than a factor of 50.
2294 
2295    Collective
2296 
2297    Input Parameters:
2298 +  comm - MPI communicator
2299 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2300           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2301 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2302            This value should be the same as the local size used in creating the
2303            y vector for the matrix-vector product y = Ax.
2304 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2305            This value should be the same as the local size used in creating the
2306            x vector for the matrix-vector product y = Ax.
2307 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2308 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2309 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2310            submatrix  (same for all local rows)
2311 .  d_nnz - array containing the number of block nonzeros in the various block rows
2312            in the upper triangular portion of the in diagonal portion of the local
2313            (possibly different for each block block row) or NULL.
2314            If you plan to factor the matrix you must leave room for the diagonal entry and
2315            set its value even if it is zero.
2316 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2317            submatrix (same for all local rows).
2318 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2319            off-diagonal portion of the local submatrix (possibly different for
2320            each block row) or NULL.
2321 
2322    Output Parameter:
2323 .  A - the matrix
2324 
2325    Options Database Keys:
2326 +   -mat_no_unroll - uses code that does not unroll the loops in the
2327                      block calculations (much slower)
2328 .   -mat_block_size - size of the blocks to use
2329 -   -mat_mpi - use the parallel matrix data structures even on one processor
2330                (defaults to using SeqBAIJ format on one processor)
2331 
2332    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2333    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2334    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2335 
2336    Notes:
2337    The number of rows and columns must be divisible by blocksize.
2338    This matrix type does not support complex Hermitian operation.
2339 
2340    The user MUST specify either the local or global matrix dimensions
2341    (possibly both).
2342 
2343    If` PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2344    than it must be used on all processors that share the object for that argument.
2345 
2346    If the *_nnz parameter is given then the *_nz parameter is ignored
2347 
2348    Storage Information:
2349    For a square global matrix we define each processor's diagonal portion
2350    to be its local rows and the corresponding columns (a square submatrix);
2351    each processor's off-diagonal portion encompasses the remainder of the
2352    local matrix (a rectangular submatrix).
2353 
2354    The user can specify preallocated storage for the diagonal part of
2355    the local submatrix with either d_nz or d_nnz (not both).  Set
2356    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2357    memory allocation.  Likewise, specify preallocated storage for the
2358    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2359 
2360    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2361    the figure below we depict these three local rows and all columns (0-11).
2362 
2363 .vb
2364            0 1 2 3 4 5 6 7 8 9 10 11
2365           --------------------------
2366    row 3  |. . . d d d o o o o  o  o
2367    row 4  |. . . d d d o o o o  o  o
2368    row 5  |. . . d d d o o o o  o  o
2369           --------------------------
2370 .ve
2371 
2372    Thus, any entries in the d locations are stored in the d (diagonal)
2373    submatrix, and any entries in the o locations are stored in the
2374    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2375    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2376 
2377    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2378    plus the diagonal part of the d matrix,
2379    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2380    In general, for PDE problems in which most nonzeros are near the diagonal,
2381    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2382    or you will get TERRIBLE performance; see the users' manual chapter on
2383    matrices.
2384 
2385    Level: intermediate
2386 
2387 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2388 @*/
2389 
2390 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2391 {
2392   PetscMPIInt size;
2393 
2394   PetscFunctionBegin;
2395   PetscCall(MatCreate(comm, A));
2396   PetscCall(MatSetSizes(*A, m, n, M, N));
2397   PetscCallMPI(MPI_Comm_size(comm, &size));
2398   if (size > 1) {
2399     PetscCall(MatSetType(*A, MATMPISBAIJ));
2400     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2401   } else {
2402     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2403     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2404   }
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2409 {
2410   Mat           mat;
2411   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2412   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2413   PetscScalar  *array;
2414 
2415   PetscFunctionBegin;
2416   *newmat = NULL;
2417 
2418   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2419   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2420   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2421   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2422   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2423 
2424   mat->factortype   = matin->factortype;
2425   mat->preallocated = PETSC_TRUE;
2426   mat->assembled    = PETSC_TRUE;
2427   mat->insertmode   = NOT_SET_VALUES;
2428 
2429   a      = (Mat_MPISBAIJ *)mat->data;
2430   a->bs2 = oldmat->bs2;
2431   a->mbs = oldmat->mbs;
2432   a->nbs = oldmat->nbs;
2433   a->Mbs = oldmat->Mbs;
2434   a->Nbs = oldmat->Nbs;
2435 
2436   a->size         = oldmat->size;
2437   a->rank         = oldmat->rank;
2438   a->donotstash   = oldmat->donotstash;
2439   a->roworiented  = oldmat->roworiented;
2440   a->rowindices   = NULL;
2441   a->rowvalues    = NULL;
2442   a->getrowactive = PETSC_FALSE;
2443   a->barray       = NULL;
2444   a->rstartbs     = oldmat->rstartbs;
2445   a->rendbs       = oldmat->rendbs;
2446   a->cstartbs     = oldmat->cstartbs;
2447   a->cendbs       = oldmat->cendbs;
2448 
2449   /* hash table stuff */
2450   a->ht           = NULL;
2451   a->hd           = NULL;
2452   a->ht_size      = 0;
2453   a->ht_flag      = oldmat->ht_flag;
2454   a->ht_fact      = oldmat->ht_fact;
2455   a->ht_total_ct  = 0;
2456   a->ht_insert_ct = 0;
2457 
2458   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2459   if (oldmat->colmap) {
2460 #if defined(PETSC_USE_CTABLE)
2461     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2462 #else
2463     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2464     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2465 #endif
2466   } else a->colmap = NULL;
2467 
2468   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2469     PetscCall(PetscMalloc1(len, &a->garray));
2470     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2471   } else a->garray = NULL;
2472 
2473   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2474   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2475   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2476 
2477   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2478   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2479 
2480   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2481   PetscCall(VecGetArray(a->slvec1, &array));
2482   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2483   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2484   PetscCall(VecRestoreArray(a->slvec1, &array));
2485   PetscCall(VecGetArray(a->slvec0, &array));
2486   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2487   PetscCall(VecRestoreArray(a->slvec0, &array));
2488 
2489   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2490   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2491   a->sMvctx = oldmat->sMvctx;
2492 
2493   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2494   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2495   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2496   *newmat = mat;
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2501 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2502 
2503 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2504 {
2505   PetscBool isbinary;
2506 
2507   PetscFunctionBegin;
2508   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2509   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2510   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2515 {
2516   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2517   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2518   PetscReal     atmp;
2519   PetscReal    *work, *svalues, *rvalues;
2520   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2521   PetscMPIInt   rank, size;
2522   PetscInt     *rowners_bs, dest, count, source;
2523   PetscScalar  *va;
2524   MatScalar    *ba;
2525   MPI_Status    stat;
2526 
2527   PetscFunctionBegin;
2528   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2529   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2530   PetscCall(VecGetArray(v, &va));
2531 
2532   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2533   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2534 
2535   bs  = A->rmap->bs;
2536   mbs = a->mbs;
2537   Mbs = a->Mbs;
2538   ba  = b->a;
2539   bi  = b->i;
2540   bj  = b->j;
2541 
2542   /* find ownerships */
2543   rowners_bs = A->rmap->range;
2544 
2545   /* each proc creates an array to be distributed */
2546   PetscCall(PetscCalloc1(bs * Mbs, &work));
2547 
2548   /* row_max for B */
2549   if (rank != size - 1) {
2550     for (i = 0; i < mbs; i++) {
2551       ncols = bi[1] - bi[0];
2552       bi++;
2553       brow = bs * i;
2554       for (j = 0; j < ncols; j++) {
2555         bcol = bs * (*bj);
2556         for (kcol = 0; kcol < bs; kcol++) {
2557           col = bcol + kcol;           /* local col index */
2558           col += rowners_bs[rank + 1]; /* global col index */
2559           for (krow = 0; krow < bs; krow++) {
2560             atmp = PetscAbsScalar(*ba);
2561             ba++;
2562             row = brow + krow; /* local row index */
2563             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2564             if (work[col] < atmp) work[col] = atmp;
2565           }
2566         }
2567         bj++;
2568       }
2569     }
2570 
2571     /* send values to its owners */
2572     for (dest = rank + 1; dest < size; dest++) {
2573       svalues = work + rowners_bs[dest];
2574       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2575       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2576     }
2577   }
2578 
2579   /* receive values */
2580   if (rank) {
2581     rvalues = work;
2582     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2583     for (source = 0; source < rank; source++) {
2584       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2585       /* process values */
2586       for (i = 0; i < count; i++) {
2587         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2588       }
2589     }
2590   }
2591 
2592   PetscCall(VecRestoreArray(v, &va));
2593   PetscCall(PetscFree(work));
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2598 {
2599   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2600   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2601   PetscScalar       *x, *ptr, *from;
2602   Vec                bb1;
2603   const PetscScalar *b;
2604 
2605   PetscFunctionBegin;
2606   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2607   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2608 
2609   if (flag == SOR_APPLY_UPPER) {
2610     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2611     PetscFunctionReturn(0);
2612   }
2613 
2614   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2615     if (flag & SOR_ZERO_INITIAL_GUESS) {
2616       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2617       its--;
2618     }
2619 
2620     PetscCall(VecDuplicate(bb, &bb1));
2621     while (its--) {
2622       /* lower triangular part: slvec0b = - B^T*xx */
2623       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2624 
2625       /* copy xx into slvec0a */
2626       PetscCall(VecGetArray(mat->slvec0, &ptr));
2627       PetscCall(VecGetArray(xx, &x));
2628       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2629       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2630 
2631       PetscCall(VecScale(mat->slvec0, -1.0));
2632 
2633       /* copy bb into slvec1a */
2634       PetscCall(VecGetArray(mat->slvec1, &ptr));
2635       PetscCall(VecGetArrayRead(bb, &b));
2636       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2637       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2638 
2639       /* set slvec1b = 0 */
2640       PetscCall(VecSet(mat->slvec1b, 0.0));
2641 
2642       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2643       PetscCall(VecRestoreArray(xx, &x));
2644       PetscCall(VecRestoreArrayRead(bb, &b));
2645       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2646 
2647       /* upper triangular part: bb1 = bb1 - B*x */
2648       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2649 
2650       /* local diagonal sweep */
2651       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2652     }
2653     PetscCall(VecDestroy(&bb1));
2654   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2655     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2656   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2657     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2658   } else if (flag & SOR_EISENSTAT) {
2659     Vec                xx1;
2660     PetscBool          hasop;
2661     const PetscScalar *diag;
2662     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2663     PetscInt           i, n;
2664 
2665     if (!mat->xx1) {
2666       PetscCall(VecDuplicate(bb, &mat->xx1));
2667       PetscCall(VecDuplicate(bb, &mat->bb1));
2668     }
2669     xx1 = mat->xx1;
2670     bb1 = mat->bb1;
2671 
2672     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2673 
2674     if (!mat->diag) {
2675       /* this is wrong for same matrix with new nonzero values */
2676       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2677       PetscCall(MatGetDiagonal(matin, mat->diag));
2678     }
2679     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2680 
2681     if (hasop) {
2682       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2683       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2684     } else {
2685       /*
2686           These two lines are replaced by code that may be a bit faster for a good compiler
2687       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2688       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2689       */
2690       PetscCall(VecGetArray(mat->slvec1a, &sl));
2691       PetscCall(VecGetArrayRead(mat->diag, &diag));
2692       PetscCall(VecGetArrayRead(bb, &b));
2693       PetscCall(VecGetArray(xx, &x));
2694       PetscCall(VecGetLocalSize(xx, &n));
2695       if (omega == 1.0) {
2696         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2697         PetscCall(PetscLogFlops(2.0 * n));
2698       } else {
2699         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2700         PetscCall(PetscLogFlops(3.0 * n));
2701       }
2702       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2703       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2704       PetscCall(VecRestoreArrayRead(bb, &b));
2705       PetscCall(VecRestoreArray(xx, &x));
2706     }
2707 
2708     /* multiply off-diagonal portion of matrix */
2709     PetscCall(VecSet(mat->slvec1b, 0.0));
2710     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2711     PetscCall(VecGetArray(mat->slvec0, &from));
2712     PetscCall(VecGetArray(xx, &x));
2713     PetscCall(PetscArraycpy(from, x, bs * mbs));
2714     PetscCall(VecRestoreArray(mat->slvec0, &from));
2715     PetscCall(VecRestoreArray(xx, &x));
2716     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2717     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2718     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2719 
2720     /* local sweep */
2721     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2722     PetscCall(VecAXPY(xx, 1.0, xx1));
2723   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 /*@
2728      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2729          CSR format the local rows.
2730 
2731    Collective
2732 
2733    Input Parameters:
2734 +  comm - MPI communicator
2735 .  bs - the block size, only a block size of 1 is supported
2736 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2737 .  n - This value should be the same as the local size used in creating the
2738        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2739        calculated if N is given) For square matrices n is almost always m.
2740 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2741 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2742 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2743 .   j - column indices
2744 -   a - matrix values
2745 
2746    Output Parameter:
2747 .   mat - the matrix
2748 
2749    Level: intermediate
2750 
2751    Notes:
2752        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2753      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2754      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2755 
2756        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2757 
2758 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2759           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2760 @*/
2761 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2762 {
2763   PetscFunctionBegin;
2764   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2765   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2766   PetscCall(MatCreate(comm, mat));
2767   PetscCall(MatSetSizes(*mat, m, n, M, N));
2768   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2769   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 /*@C
2774    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2775 
2776    Collective
2777 
2778    Input Parameters:
2779 +  B - the matrix
2780 .  bs - the block size
2781 .  i - the indices into j for the start of each local row (starts with zero)
2782 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2783 -  v - optional values in the matrix
2784 
2785    Level: advanced
2786 
2787    Notes:
2788    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2789    and usually the numerical values as well
2790 
2791    Any entries below the diagonal are ignored
2792 
2793 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2794 @*/
2795 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2796 {
2797   PetscFunctionBegin;
2798   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2803 {
2804   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2805   PetscInt    *indx;
2806   PetscScalar *values;
2807 
2808   PetscFunctionBegin;
2809   PetscCall(MatGetSize(inmat, &m, &N));
2810   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2811     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2812     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2813     PetscInt     *bindx, rmax = a->rmax, j;
2814     PetscMPIInt   rank, size;
2815 
2816     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2817     mbs = m / bs;
2818     Nbs = N / cbs;
2819     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2820     nbs = n / cbs;
2821 
2822     PetscCall(PetscMalloc1(rmax, &bindx));
2823     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2824 
2825     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2826     PetscCallMPI(MPI_Comm_rank(comm, &size));
2827     if (rank == size - 1) {
2828       /* Check sum(nbs) = Nbs */
2829       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2830     }
2831 
2832     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2833     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2834     for (i = 0; i < mbs; i++) {
2835       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2836       nnz = nnz / bs;
2837       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2838       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2839       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2840     }
2841     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2842     PetscCall(PetscFree(bindx));
2843 
2844     PetscCall(MatCreate(comm, outmat));
2845     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2846     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2847     PetscCall(MatSetType(*outmat, MATSBAIJ));
2848     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2849     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2850     MatPreallocateEnd(dnz, onz);
2851   }
2852 
2853   /* numeric phase */
2854   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2855   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2856 
2857   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2858   for (i = 0; i < m; i++) {
2859     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2860     Ii = i + rstart;
2861     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2862     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2863   }
2864   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2865   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2866   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2867   PetscFunctionReturn(0);
2868 }
2869