xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 25bb0ba314d6f6d1e46faa28fdee7c52cd6cfb4e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 #if defined(PETSC_HAVE_ELEMENTAL)
7 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
8 #endif
9 #if defined(PETSC_HAVE_SCALAPACK)
10 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
11 #endif
12 
13 /* This could be moved to matimpl.h */
14 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
15 {
16   Mat       preallocator;
17   PetscInt  r, rstart, rend;
18   PetscInt  bs, i, m, n, M, N;
19   PetscBool cong = PETSC_TRUE;
20 
21   PetscFunctionBegin;
22   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
23   PetscValidLogicalCollectiveInt(B, nm, 2);
24   for (i = 0; i < nm; i++) {
25     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
26     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
27     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
28   }
29   PetscValidLogicalCollectiveBool(B, fill, 5);
30   PetscCall(MatGetBlockSize(B, &bs));
31   PetscCall(MatGetSize(B, &M, &N));
32   PetscCall(MatGetLocalSize(B, &m, &n));
33   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
34   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
35   PetscCall(MatSetBlockSize(preallocator, bs));
36   PetscCall(MatSetSizes(preallocator, m, n, M, N));
37   PetscCall(MatSetUp(preallocator));
38   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
39   for (r = rstart; r < rend; ++r) {
40     PetscInt           ncols;
41     const PetscInt    *row;
42     const PetscScalar *vals;
43 
44     for (i = 0; i < nm; i++) {
45       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
46       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
47       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
48       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
49     }
50   }
51   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
52   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
53   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
54   PetscCall(MatDestroy(&preallocator));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
59 {
60   Mat      B;
61   PetscInt r;
62 
63   PetscFunctionBegin;
64   if (reuse != MAT_REUSE_MATRIX) {
65     PetscBool symm = PETSC_TRUE, isdense;
66     PetscInt  bs;
67 
68     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
69     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
70     PetscCall(MatSetType(B, newtype));
71     PetscCall(MatGetBlockSize(A, &bs));
72     PetscCall(MatSetBlockSize(B, bs));
73     PetscCall(PetscLayoutSetUp(B->rmap));
74     PetscCall(PetscLayoutSetUp(B->cmap));
75     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
76     if (!isdense) {
77       PetscCall(MatGetRowUpperTriangular(A));
78       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
79       PetscCall(MatRestoreRowUpperTriangular(A));
80     } else {
81       PetscCall(MatSetUp(B));
82     }
83   } else {
84     B = *newmat;
85     PetscCall(MatZeroEntries(B));
86   }
87 
88   PetscCall(MatGetRowUpperTriangular(A));
89   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
90     PetscInt           ncols;
91     const PetscInt    *row;
92     const PetscScalar *vals;
93 
94     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
95     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
96 #if defined(PETSC_USE_COMPLEX)
97     if (A->hermitian == PETSC_BOOL3_TRUE) {
98       PetscInt i;
99       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
100     } else {
101       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
102     }
103 #else
104     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
105 #endif
106     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
107   }
108   PetscCall(MatRestoreRowUpperTriangular(A));
109   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
110   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
111 
112   if (reuse == MAT_INPLACE_MATRIX) {
113     PetscCall(MatHeaderReplace(A, &B));
114   } else {
115     *newmat = B;
116   }
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
121 {
122   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
123 
124   PetscFunctionBegin;
125   PetscCall(MatStoreValues(aij->A));
126   PetscCall(MatStoreValues(aij->B));
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
131 {
132   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
133 
134   PetscFunctionBegin;
135   PetscCall(MatRetrieveValues(aij->A));
136   PetscCall(MatRetrieveValues(aij->B));
137   PetscFunctionReturn(PETSC_SUCCESS);
138 }
139 
140 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
141   { \
142     brow = row / bs; \
143     rp   = aj + ai[brow]; \
144     ap   = aa + bs2 * ai[brow]; \
145     rmax = aimax[brow]; \
146     nrow = ailen[brow]; \
147     bcol = col / bs; \
148     ridx = row % bs; \
149     cidx = col % bs; \
150     low  = 0; \
151     high = nrow; \
152     while (high - low > 3) { \
153       t = (low + high) / 2; \
154       if (rp[t] > bcol) high = t; \
155       else low = t; \
156     } \
157     for (_i = low; _i < high; _i++) { \
158       if (rp[_i] > bcol) break; \
159       if (rp[_i] == bcol) { \
160         bap = ap + bs2 * _i + bs * cidx + ridx; \
161         if (addv == ADD_VALUES) *bap += value; \
162         else *bap = value; \
163         goto a_noinsert; \
164       } \
165     } \
166     if (a->nonew == 1) goto a_noinsert; \
167     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
168     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
169     N = nrow++ - 1; \
170     /* shift up all the later entries in this row */ \
171     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
172     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
173     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
174     rp[_i]                          = bcol; \
175     ap[bs2 * _i + bs * cidx + ridx] = value; \
176     A->nonzerostate++; \
177   a_noinsert:; \
178     ailen[brow] = nrow; \
179   }
180 
181 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
182   { \
183     brow = row / bs; \
184     rp   = bj + bi[brow]; \
185     ap   = ba + bs2 * bi[brow]; \
186     rmax = bimax[brow]; \
187     nrow = bilen[brow]; \
188     bcol = col / bs; \
189     ridx = row % bs; \
190     cidx = col % bs; \
191     low  = 0; \
192     high = nrow; \
193     while (high - low > 3) { \
194       t = (low + high) / 2; \
195       if (rp[t] > bcol) high = t; \
196       else low = t; \
197     } \
198     for (_i = low; _i < high; _i++) { \
199       if (rp[_i] > bcol) break; \
200       if (rp[_i] == bcol) { \
201         bap = ap + bs2 * _i + bs * cidx + ridx; \
202         if (addv == ADD_VALUES) *bap += value; \
203         else *bap = value; \
204         goto b_noinsert; \
205       } \
206     } \
207     if (b->nonew == 1) goto b_noinsert; \
208     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
209     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
210     N = nrow++ - 1; \
211     /* shift up all the later entries in this row */ \
212     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
213     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
214     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
215     rp[_i]                          = bcol; \
216     ap[bs2 * _i + bs * cidx + ridx] = value; \
217     B->nonzerostate++; \
218   b_noinsert:; \
219     bilen[brow] = nrow; \
220   }
221 
222 /* Only add/insert a(i,j) with i<=j (blocks).
223    Any a(i,j) with i>j input by user is ignored or generates an error
224 */
225 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
226 {
227   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
228   MatScalar     value;
229   PetscBool     roworiented = baij->roworiented;
230   PetscInt      i, j, row, col;
231   PetscInt      rstart_orig = mat->rmap->rstart;
232   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
233   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
234 
235   /* Some Variables required in the macro */
236   Mat           A     = baij->A;
237   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
238   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
239   MatScalar    *aa = a->a;
240 
241   Mat          B     = baij->B;
242   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
243   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
244   MatScalar   *ba = b->a;
245 
246   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
247   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
248   MatScalar *ap, *bap;
249 
250   /* for stash */
251   PetscInt   n_loc, *in_loc = NULL;
252   MatScalar *v_loc = NULL;
253 
254   PetscFunctionBegin;
255   if (!baij->donotstash) {
256     if (n > baij->n_loc) {
257       PetscCall(PetscFree(baij->in_loc));
258       PetscCall(PetscFree(baij->v_loc));
259       PetscCall(PetscMalloc1(n, &baij->in_loc));
260       PetscCall(PetscMalloc1(n, &baij->v_loc));
261 
262       baij->n_loc = n;
263     }
264     in_loc = baij->in_loc;
265     v_loc  = baij->v_loc;
266   }
267 
268   for (i = 0; i < m; i++) {
269     if (im[i] < 0) continue;
270     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
271     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
272       row = im[i] - rstart_orig;                     /* local row index */
273       for (j = 0; j < n; j++) {
274         if (im[i] / bs > in[j] / bs) {
275           if (a->ignore_ltriangular) {
276             continue; /* ignore lower triangular blocks */
277           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
278         }
279         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
280           col  = in[j] - cstart_orig;                    /* local col index */
281           brow = row / bs;
282           bcol = col / bs;
283           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
284           if (roworiented) value = v[i * n + j];
285           else value = v[i + j * m];
286           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
287           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
288         } else if (in[j] < 0) {
289           continue;
290         } else {
291           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
292           /* off-diag entry (B) */
293           if (mat->was_assembled) {
294             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
295 #if defined(PETSC_USE_CTABLE)
296             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
297             col = col - 1;
298 #else
299             col = baij->colmap[in[j] / bs] - 1;
300 #endif
301             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
302               PetscCall(MatDisAssemble_MPISBAIJ(mat));
303               col = in[j];
304               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
305               B     = baij->B;
306               b     = (Mat_SeqBAIJ *)(B)->data;
307               bimax = b->imax;
308               bi    = b->i;
309               bilen = b->ilen;
310               bj    = b->j;
311               ba    = b->a;
312             } else col += in[j] % bs;
313           } else col = in[j];
314           if (roworiented) value = v[i * n + j];
315           else value = v[i + j * m];
316           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
317           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
318         }
319       }
320     } else { /* off processor entry */
321       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
322       if (!baij->donotstash) {
323         mat->assembled = PETSC_FALSE;
324         n_loc          = 0;
325         for (j = 0; j < n; j++) {
326           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
327           in_loc[n_loc] = in[j];
328           if (roworiented) {
329             v_loc[n_loc] = v[i * n + j];
330           } else {
331             v_loc[n_loc] = v[j * m + i];
332           }
333           n_loc++;
334         }
335         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
336       }
337     }
338   }
339   PetscFunctionReturn(PETSC_SUCCESS);
340 }
341 
342 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
343 {
344   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
345   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
346   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
347   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
348   PetscBool          roworiented = a->roworiented;
349   const PetscScalar *value       = v;
350   MatScalar         *ap, *aa = a->a, *bap;
351 
352   PetscFunctionBegin;
353   if (col < row) {
354     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
355     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
356   }
357   rp    = aj + ai[row];
358   ap    = aa + bs2 * ai[row];
359   rmax  = imax[row];
360   nrow  = ailen[row];
361   value = v;
362   low   = 0;
363   high  = nrow;
364 
365   while (high - low > 7) {
366     t = (low + high) / 2;
367     if (rp[t] > col) high = t;
368     else low = t;
369   }
370   for (i = low; i < high; i++) {
371     if (rp[i] > col) break;
372     if (rp[i] == col) {
373       bap = ap + bs2 * i;
374       if (roworiented) {
375         if (is == ADD_VALUES) {
376           for (ii = 0; ii < bs; ii++) {
377             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
378           }
379         } else {
380           for (ii = 0; ii < bs; ii++) {
381             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii = 0; ii < bs; ii++) {
387             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
388           }
389         } else {
390           for (ii = 0; ii < bs; ii++) {
391             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
392           }
393         }
394       }
395       goto noinsert2;
396     }
397   }
398   if (nonew == 1) goto noinsert2;
399   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
400   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
401   N = nrow++ - 1;
402   high++;
403   /* shift up all the later entries in this row */
404   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
405   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
406   rp[i] = col;
407   bap   = ap + bs2 * i;
408   if (roworiented) {
409     for (ii = 0; ii < bs; ii++) {
410       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
411     }
412   } else {
413     for (ii = 0; ii < bs; ii++) {
414       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
415     }
416   }
417 noinsert2:;
418   ailen[row] = nrow;
419   PetscFunctionReturn(PETSC_SUCCESS);
420 }
421 
422 /*
423    This routine is exactly duplicated in mpibaij.c
424 */
425 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
428   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
429   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
430   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
431   PetscBool          roworiented = a->roworiented;
432   const PetscScalar *value       = v;
433   MatScalar         *ap, *aa = a->a, *bap;
434 
435   PetscFunctionBegin;
436   rp    = aj + ai[row];
437   ap    = aa + bs2 * ai[row];
438   rmax  = imax[row];
439   nrow  = ailen[row];
440   low   = 0;
441   high  = nrow;
442   value = v;
443   while (high - low > 7) {
444     t = (low + high) / 2;
445     if (rp[t] > col) high = t;
446     else low = t;
447   }
448   for (i = low; i < high; i++) {
449     if (rp[i] > col) break;
450     if (rp[i] == col) {
451       bap = ap + bs2 * i;
452       if (roworiented) {
453         if (is == ADD_VALUES) {
454           for (ii = 0; ii < bs; ii++) {
455             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
456           }
457         } else {
458           for (ii = 0; ii < bs; ii++) {
459             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
460           }
461         }
462       } else {
463         if (is == ADD_VALUES) {
464           for (ii = 0; ii < bs; ii++, value += bs) {
465             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
466             bap += bs;
467           }
468         } else {
469           for (ii = 0; ii < bs; ii++, value += bs) {
470             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
471             bap += bs;
472           }
473         }
474       }
475       goto noinsert2;
476     }
477   }
478   if (nonew == 1) goto noinsert2;
479   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
480   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
481   N = nrow++ - 1;
482   high++;
483   /* shift up all the later entries in this row */
484   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
485   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
486   rp[i] = col;
487   bap   = ap + bs2 * i;
488   if (roworiented) {
489     for (ii = 0; ii < bs; ii++) {
490       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
491     }
492   } else {
493     for (ii = 0; ii < bs; ii++) {
494       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
495     }
496   }
497 noinsert2:;
498   ailen[row] = nrow;
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
502 /*
503     This routine could be optimized by removing the need for the block copy below and passing stride information
504   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
505 */
506 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
507 {
508   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
509   const MatScalar *value;
510   MatScalar       *barray      = baij->barray;
511   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
512   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
513   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
514   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
515 
516   PetscFunctionBegin;
517   if (!barray) {
518     PetscCall(PetscMalloc1(bs2, &barray));
519     baij->barray = barray;
520   }
521 
522   if (roworiented) {
523     stepval = (n - 1) * bs;
524   } else {
525     stepval = (m - 1) * bs;
526   }
527   for (i = 0; i < m; i++) {
528     if (im[i] < 0) continue;
529     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
530     if (im[i] >= rstart && im[i] < rend) {
531       row = im[i] - rstart;
532       for (j = 0; j < n; j++) {
533         if (im[i] > in[j]) {
534           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
535           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
536         }
537         /* If NumCol = 1 then a copy is not required */
538         if ((roworiented) && (n == 1)) {
539           barray = (MatScalar *)v + i * bs2;
540         } else if ((!roworiented) && (m == 1)) {
541           barray = (MatScalar *)v + j * bs2;
542         } else { /* Here a copy is required */
543           if (roworiented) {
544             value = v + i * (stepval + bs) * bs + j * bs;
545           } else {
546             value = v + j * (stepval + bs) * bs + i * bs;
547           }
548           for (ii = 0; ii < bs; ii++, value += stepval) {
549             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
550           }
551           barray -= bs2;
552         }
553 
554         if (in[j] >= cstart && in[j] < cend) {
555           col = in[j] - cstart;
556           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
557         } else if (in[j] < 0) {
558           continue;
559         } else {
560           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
561           if (mat->was_assembled) {
562             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
563 
564 #if defined(PETSC_USE_DEBUG)
565   #if defined(PETSC_USE_CTABLE)
566             {
567               PetscInt data;
568               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
569               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
570             }
571   #else
572             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
573   #endif
574 #endif
575 #if defined(PETSC_USE_CTABLE)
576             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
577             col = (col - 1) / bs;
578 #else
579             col = (baij->colmap[in[j]] - 1) / bs;
580 #endif
581             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
582               PetscCall(MatDisAssemble_MPISBAIJ(mat));
583               col = in[j];
584             }
585           } else col = in[j];
586           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
587         }
588       }
589     } else {
590       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
591       if (!baij->donotstash) {
592         if (roworiented) {
593           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
594         } else {
595           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
596         }
597       }
598     }
599   }
600   PetscFunctionReturn(PETSC_SUCCESS);
601 }
602 
603 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
604 {
605   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
606   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
607   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
608 
609   PetscFunctionBegin;
610   for (i = 0; i < m; i++) {
611     if (idxm[i] < 0) continue; /* negative row */
612     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
613     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
614       row = idxm[i] - bsrstart;
615       for (j = 0; j < n; j++) {
616         if (idxn[j] < 0) continue; /* negative column */
617         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
618         if (idxn[j] >= bscstart && idxn[j] < bscend) {
619           col = idxn[j] - bscstart;
620           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
621         } else {
622           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
623 #if defined(PETSC_USE_CTABLE)
624           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
625           data--;
626 #else
627           data = baij->colmap[idxn[j] / bs] - 1;
628 #endif
629           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
630           else {
631             col = data + idxn[j] % bs;
632             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
637   }
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
642 {
643   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
644   PetscReal     sum[2], *lnorm2;
645 
646   PetscFunctionBegin;
647   if (baij->size == 1) {
648     PetscCall(MatNorm(baij->A, type, norm));
649   } else {
650     if (type == NORM_FROBENIUS) {
651       PetscCall(PetscMalloc1(2, &lnorm2));
652       PetscCall(MatNorm(baij->A, type, lnorm2));
653       *lnorm2 = (*lnorm2) * (*lnorm2);
654       lnorm2++; /* squar power of norm(A) */
655       PetscCall(MatNorm(baij->B, type, lnorm2));
656       *lnorm2 = (*lnorm2) * (*lnorm2);
657       lnorm2--; /* squar power of norm(B) */
658       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
659       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
660       PetscCall(PetscFree(lnorm2));
661     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
662       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
663       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
664       PetscReal    *rsum, *rsum2, vabs;
665       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
666       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
667       MatScalar    *v;
668 
669       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
670       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
671       /* Amat */
672       v  = amat->a;
673       jj = amat->j;
674       for (brow = 0; brow < mbs; brow++) {
675         grow = bs * (rstart + brow);
676         nz   = amat->i[brow + 1] - amat->i[brow];
677         for (bcol = 0; bcol < nz; bcol++) {
678           gcol = bs * (rstart + *jj);
679           jj++;
680           for (col = 0; col < bs; col++) {
681             for (row = 0; row < bs; row++) {
682               vabs = PetscAbsScalar(*v);
683               v++;
684               rsum[gcol + col] += vabs;
685               /* non-diagonal block */
686               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
687             }
688           }
689         }
690         PetscCall(PetscLogFlops(nz * bs * bs));
691       }
692       /* Bmat */
693       v  = bmat->a;
694       jj = bmat->j;
695       for (brow = 0; brow < mbs; brow++) {
696         grow = bs * (rstart + brow);
697         nz   = bmat->i[brow + 1] - bmat->i[brow];
698         for (bcol = 0; bcol < nz; bcol++) {
699           gcol = bs * garray[*jj];
700           jj++;
701           for (col = 0; col < bs; col++) {
702             for (row = 0; row < bs; row++) {
703               vabs = PetscAbsScalar(*v);
704               v++;
705               rsum[gcol + col] += vabs;
706               rsum[grow + row] += vabs;
707             }
708           }
709         }
710         PetscCall(PetscLogFlops(nz * bs * bs));
711       }
712       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
713       *norm = 0.0;
714       for (col = 0; col < mat->cmap->N; col++) {
715         if (rsum2[col] > *norm) *norm = rsum2[col];
716       }
717       PetscCall(PetscFree2(rsum, rsum2));
718     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
719   }
720   PetscFunctionReturn(PETSC_SUCCESS);
721 }
722 
723 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
724 {
725   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
726   PetscInt      nstash, reallocs;
727 
728   PetscFunctionBegin;
729   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
730 
731   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
732   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
733   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
734   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
735   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
736   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
743   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
744   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
745   PetscInt     *row, *col;
746   PetscBool     other_disassembled;
747   PetscMPIInt   n;
748   PetscBool     r1, r2, r3;
749   MatScalar    *val;
750 
751   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
752   PetscFunctionBegin;
753   if (!baij->donotstash && !mat->nooffprocentries) {
754     while (1) {
755       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
756       if (!flg) break;
757 
758       for (i = 0; i < n;) {
759         /* Now identify the consecutive vals belonging to the same row */
760         for (j = i, rstart = row[j]; j < n; j++) {
761           if (row[j] != rstart) break;
762         }
763         if (j < n) ncols = j - i;
764         else ncols = n - i;
765         /* Now assemble all these values with a single function call */
766         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
767         i = j;
768       }
769     }
770     PetscCall(MatStashScatterEnd_Private(&mat->stash));
771     /* Now process the block-stash. Since the values are stashed column-oriented,
772        set the roworiented flag to column oriented, and after MatSetValues()
773        restore the original flags */
774     r1 = baij->roworiented;
775     r2 = a->roworiented;
776     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
777 
778     baij->roworiented = PETSC_FALSE;
779     a->roworiented    = PETSC_FALSE;
780 
781     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
782     while (1) {
783       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
784       if (!flg) break;
785 
786       for (i = 0; i < n;) {
787         /* Now identify the consecutive vals belonging to the same row */
788         for (j = i, rstart = row[j]; j < n; j++) {
789           if (row[j] != rstart) break;
790         }
791         if (j < n) ncols = j - i;
792         else ncols = n - i;
793         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
794         i = j;
795       }
796     }
797     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
798 
799     baij->roworiented = r1;
800     a->roworiented    = r2;
801 
802     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
803   }
804 
805   PetscCall(MatAssemblyBegin(baij->A, mode));
806   PetscCall(MatAssemblyEnd(baij->A, mode));
807 
808   /* determine if any processor has disassembled, if so we must
809      also disassemble ourselves, in order that we may reassemble. */
810   /*
811      if nonzero structure of submatrix B cannot change then we know that
812      no processor disassembled thus we can skip this stuff
813   */
814   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
815     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
816     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
817   }
818 
819   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
820   PetscCall(MatAssemblyBegin(baij->B, mode));
821   PetscCall(MatAssemblyEnd(baij->B, mode));
822 
823   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
824 
825   baij->rowvalues = NULL;
826 
827   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
828   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
829     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
830     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
831   }
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
836 #include <petscdraw.h>
837 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
838 {
839   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
840   PetscInt          bs   = mat->rmap->bs;
841   PetscMPIInt       rank = baij->rank;
842   PetscBool         iascii, isdraw;
843   PetscViewer       sviewer;
844   PetscViewerFormat format;
845 
846   PetscFunctionBegin;
847   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
848   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
849   if (iascii) {
850     PetscCall(PetscViewerGetFormat(viewer, &format));
851     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
852       MatInfo info;
853       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
854       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
855       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
856       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
857                                                    mat->rmap->bs, (double)info.memory));
858       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
859       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
860       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
861       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
862       PetscCall(PetscViewerFlush(viewer));
863       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
864       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
865       PetscCall(VecScatterView(baij->Mvctx, viewer));
866       PetscFunctionReturn(PETSC_SUCCESS);
867     } else if (format == PETSC_VIEWER_ASCII_INFO) {
868       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
869       PetscFunctionReturn(PETSC_SUCCESS);
870     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
871       PetscFunctionReturn(PETSC_SUCCESS);
872     }
873   }
874 
875   if (isdraw) {
876     PetscDraw draw;
877     PetscBool isnull;
878     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
879     PetscCall(PetscDrawIsNull(draw, &isnull));
880     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
881   }
882 
883   {
884     /* assemble the entire matrix onto first processor. */
885     Mat           A;
886     Mat_SeqSBAIJ *Aloc;
887     Mat_SeqBAIJ  *Bloc;
888     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
889     MatScalar    *a;
890     const char   *matname;
891 
892     /* Should this be the same type as mat? */
893     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
894     if (rank == 0) {
895       PetscCall(MatSetSizes(A, M, N, M, N));
896     } else {
897       PetscCall(MatSetSizes(A, 0, 0, M, N));
898     }
899     PetscCall(MatSetType(A, MATMPISBAIJ));
900     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
901     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
902 
903     /* copy over the A part */
904     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
905     ai   = Aloc->i;
906     aj   = Aloc->j;
907     a    = Aloc->a;
908     PetscCall(PetscMalloc1(bs, &rvals));
909 
910     for (i = 0; i < mbs; i++) {
911       rvals[0] = bs * (baij->rstartbs + i);
912       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
913       for (j = ai[i]; j < ai[i + 1]; j++) {
914         col = (baij->cstartbs + aj[j]) * bs;
915         for (k = 0; k < bs; k++) {
916           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
917           col++;
918           a += bs;
919         }
920       }
921     }
922     /* copy over the B part */
923     Bloc = (Mat_SeqBAIJ *)baij->B->data;
924     ai   = Bloc->i;
925     aj   = Bloc->j;
926     a    = Bloc->a;
927     for (i = 0; i < mbs; i++) {
928       rvals[0] = bs * (baij->rstartbs + i);
929       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
930       for (j = ai[i]; j < ai[i + 1]; j++) {
931         col = baij->garray[aj[j]] * bs;
932         for (k = 0; k < bs; k++) {
933           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
934           col++;
935           a += bs;
936         }
937       }
938     }
939     PetscCall(PetscFree(rvals));
940     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
941     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
942     /*
943        Everyone has to call to draw the matrix since the graphics waits are
944        synchronized across all processors that share the PetscDraw object
945     */
946     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
947     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
948     if (rank == 0) {
949       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
950       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
951     }
952     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
953     PetscCall(PetscViewerFlush(viewer));
954     PetscCall(MatDestroy(&A));
955   }
956   PetscFunctionReturn(PETSC_SUCCESS);
957 }
958 
959 /* Used for both MPIBAIJ and MPISBAIJ matrices */
960 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
961 
962 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
963 {
964   PetscBool iascii, isdraw, issocket, isbinary;
965 
966   PetscFunctionBegin;
967   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
968   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
969   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
970   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
971   if (iascii || isdraw || issocket) {
972     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
973   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
974   PetscFunctionReturn(PETSC_SUCCESS);
975 }
976 
977 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
978 {
979   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
980 
981   PetscFunctionBegin;
982 #if defined(PETSC_USE_LOG)
983   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
984 #endif
985   PetscCall(MatStashDestroy_Private(&mat->stash));
986   PetscCall(MatStashDestroy_Private(&mat->bstash));
987   PetscCall(MatDestroy(&baij->A));
988   PetscCall(MatDestroy(&baij->B));
989 #if defined(PETSC_USE_CTABLE)
990   PetscCall(PetscHMapIDestroy(&baij->colmap));
991 #else
992   PetscCall(PetscFree(baij->colmap));
993 #endif
994   PetscCall(PetscFree(baij->garray));
995   PetscCall(VecDestroy(&baij->lvec));
996   PetscCall(VecScatterDestroy(&baij->Mvctx));
997   PetscCall(VecDestroy(&baij->slvec0));
998   PetscCall(VecDestroy(&baij->slvec0b));
999   PetscCall(VecDestroy(&baij->slvec1));
1000   PetscCall(VecDestroy(&baij->slvec1a));
1001   PetscCall(VecDestroy(&baij->slvec1b));
1002   PetscCall(VecScatterDestroy(&baij->sMvctx));
1003   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1004   PetscCall(PetscFree(baij->barray));
1005   PetscCall(PetscFree(baij->hd));
1006   PetscCall(VecDestroy(&baij->diag));
1007   PetscCall(VecDestroy(&baij->bb1));
1008   PetscCall(VecDestroy(&baij->xx1));
1009 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1010   PetscCall(PetscFree(baij->setvaluescopy));
1011 #endif
1012   PetscCall(PetscFree(baij->in_loc));
1013   PetscCall(PetscFree(baij->v_loc));
1014   PetscCall(PetscFree(baij->rangebs));
1015   PetscCall(PetscFree(mat->data));
1016 
1017   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1018   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
1022 #if defined(PETSC_HAVE_ELEMENTAL)
1023   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
1024 #endif
1025 #if defined(PETSC_HAVE_SCALAPACK)
1026   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
1027 #endif
1028   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
1029   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
1030   PetscFunctionReturn(PETSC_SUCCESS);
1031 }
1032 
1033 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1034 {
1035   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1036   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1037   PetscScalar       *from;
1038   const PetscScalar *x;
1039 
1040   PetscFunctionBegin;
1041   /* diagonal part */
1042   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1043   PetscCall(VecSet(a->slvec1b, 0.0));
1044 
1045   /* subdiagonal part */
1046   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1047   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1048 
1049   /* copy x into the vec slvec0 */
1050   PetscCall(VecGetArray(a->slvec0, &from));
1051   PetscCall(VecGetArrayRead(xx, &x));
1052 
1053   PetscCall(PetscArraycpy(from, x, bs * mbs));
1054   PetscCall(VecRestoreArray(a->slvec0, &from));
1055   PetscCall(VecRestoreArrayRead(xx, &x));
1056 
1057   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1058   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1059   /* supperdiagonal part */
1060   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1061   PetscFunctionReturn(PETSC_SUCCESS);
1062 }
1063 
1064 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1065 {
1066   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1067   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1068   PetscScalar       *from;
1069   const PetscScalar *x;
1070 
1071   PetscFunctionBegin;
1072   /* diagonal part */
1073   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1074   PetscCall(VecSet(a->slvec1b, 0.0));
1075 
1076   /* subdiagonal part */
1077   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1078 
1079   /* copy x into the vec slvec0 */
1080   PetscCall(VecGetArray(a->slvec0, &from));
1081   PetscCall(VecGetArrayRead(xx, &x));
1082 
1083   PetscCall(PetscArraycpy(from, x, bs * mbs));
1084   PetscCall(VecRestoreArray(a->slvec0, &from));
1085   PetscCall(VecRestoreArrayRead(xx, &x));
1086 
1087   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1088   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1089   /* supperdiagonal part */
1090   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1091   PetscFunctionReturn(PETSC_SUCCESS);
1092 }
1093 
1094 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1095 {
1096   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1097   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1098   PetscScalar       *from, zero       = 0.0;
1099   const PetscScalar *x;
1100 
1101   PetscFunctionBegin;
1102   /* diagonal part */
1103   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1104   PetscCall(VecSet(a->slvec1b, zero));
1105 
1106   /* subdiagonal part */
1107   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1108   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1109 
1110   /* copy x into the vec slvec0 */
1111   PetscCall(VecGetArray(a->slvec0, &from));
1112   PetscCall(VecGetArrayRead(xx, &x));
1113   PetscCall(PetscArraycpy(from, x, bs * mbs));
1114   PetscCall(VecRestoreArray(a->slvec0, &from));
1115 
1116   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1117   PetscCall(VecRestoreArrayRead(xx, &x));
1118   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1119 
1120   /* supperdiagonal part */
1121   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1128   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1129   PetscScalar       *from, zero       = 0.0;
1130   const PetscScalar *x;
1131 
1132   PetscFunctionBegin;
1133   /* diagonal part */
1134   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1135   PetscCall(VecSet(a->slvec1b, zero));
1136 
1137   /* subdiagonal part */
1138   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1139 
1140   /* copy x into the vec slvec0 */
1141   PetscCall(VecGetArray(a->slvec0, &from));
1142   PetscCall(VecGetArrayRead(xx, &x));
1143   PetscCall(PetscArraycpy(from, x, bs * mbs));
1144   PetscCall(VecRestoreArray(a->slvec0, &from));
1145 
1146   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1147   PetscCall(VecRestoreArrayRead(xx, &x));
1148   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1149 
1150   /* supperdiagonal part */
1151   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 /*
1156   This only works correctly for square matrices where the subblock A->A is the
1157    diagonal block
1158 */
1159 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1160 {
1161   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1162 
1163   PetscFunctionBegin;
1164   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1165   PetscCall(MatGetDiagonal(a->A, v));
1166   PetscFunctionReturn(PETSC_SUCCESS);
1167 }
1168 
1169 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1170 {
1171   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1172 
1173   PetscFunctionBegin;
1174   PetscCall(MatScale(a->A, aa));
1175   PetscCall(MatScale(a->B, aa));
1176   PetscFunctionReturn(PETSC_SUCCESS);
1177 }
1178 
1179 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1180 {
1181   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1182   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1183   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1184   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1185   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1186 
1187   PetscFunctionBegin;
1188   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1189   mat->getrowactive = PETSC_TRUE;
1190 
1191   if (!mat->rowvalues && (idx || v)) {
1192     /*
1193         allocate enough space to hold information from the longest row.
1194     */
1195     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1196     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1197     PetscInt      max = 1, mbs = mat->mbs, tmp;
1198     for (i = 0; i < mbs; i++) {
1199       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1200       if (max < tmp) max = tmp;
1201     }
1202     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1203   }
1204 
1205   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1206   lrow = row - brstart; /* local row index */
1207 
1208   pvA = &vworkA;
1209   pcA = &cworkA;
1210   pvB = &vworkB;
1211   pcB = &cworkB;
1212   if (!v) {
1213     pvA = NULL;
1214     pvB = NULL;
1215   }
1216   if (!idx) {
1217     pcA = NULL;
1218     if (!v) pcB = NULL;
1219   }
1220   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1221   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1222   nztot = nzA + nzB;
1223 
1224   cmap = mat->garray;
1225   if (v || idx) {
1226     if (nztot) {
1227       /* Sort by increasing column numbers, assuming A and B already sorted */
1228       PetscInt imark = -1;
1229       if (v) {
1230         *v = v_p = mat->rowvalues;
1231         for (i = 0; i < nzB; i++) {
1232           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1233           else break;
1234         }
1235         imark = i;
1236         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1237         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1238       }
1239       if (idx) {
1240         *idx = idx_p = mat->rowindices;
1241         if (imark > -1) {
1242           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1243         } else {
1244           for (i = 0; i < nzB; i++) {
1245             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1246             else break;
1247           }
1248           imark = i;
1249         }
1250         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1251         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1252       }
1253     } else {
1254       if (idx) *idx = NULL;
1255       if (v) *v = NULL;
1256     }
1257   }
1258   *nz = nztot;
1259   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1260   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1261   PetscFunctionReturn(PETSC_SUCCESS);
1262 }
1263 
1264 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1265 {
1266   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1267 
1268   PetscFunctionBegin;
1269   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1270   baij->getrowactive = PETSC_FALSE;
1271   PetscFunctionReturn(PETSC_SUCCESS);
1272 }
1273 
1274 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1275 {
1276   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1277   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1278 
1279   PetscFunctionBegin;
1280   aA->getrow_utriangular = PETSC_TRUE;
1281   PetscFunctionReturn(PETSC_SUCCESS);
1282 }
1283 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1284 {
1285   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1286   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1287 
1288   PetscFunctionBegin;
1289   aA->getrow_utriangular = PETSC_FALSE;
1290   PetscFunctionReturn(PETSC_SUCCESS);
1291 }
1292 
1293 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1294 {
1295   PetscFunctionBegin;
1296   if (PetscDefined(USE_COMPLEX)) {
1297     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1298 
1299     PetscCall(MatConjugate(a->A));
1300     PetscCall(MatConjugate(a->B));
1301   }
1302   PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304 
1305 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1306 {
1307   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1308 
1309   PetscFunctionBegin;
1310   PetscCall(MatRealPart(a->A));
1311   PetscCall(MatRealPart(a->B));
1312   PetscFunctionReturn(PETSC_SUCCESS);
1313 }
1314 
1315 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1316 {
1317   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1318 
1319   PetscFunctionBegin;
1320   PetscCall(MatImaginaryPart(a->A));
1321   PetscCall(MatImaginaryPart(a->B));
1322   PetscFunctionReturn(PETSC_SUCCESS);
1323 }
1324 
1325 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1326    Input: isrow       - distributed(parallel),
1327           iscol_local - locally owned (seq)
1328 */
1329 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1330 {
1331   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1332   const PetscInt *ptr1, *ptr2;
1333 
1334   PetscFunctionBegin;
1335   PetscCall(ISGetLocalSize(isrow, &sz1));
1336   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1337   if (sz1 > sz2) {
1338     *flg = PETSC_FALSE;
1339     PetscFunctionReturn(PETSC_SUCCESS);
1340   }
1341 
1342   PetscCall(ISGetIndices(isrow, &ptr1));
1343   PetscCall(ISGetIndices(iscol_local, &ptr2));
1344 
1345   PetscCall(PetscMalloc1(sz1, &a1));
1346   PetscCall(PetscMalloc1(sz2, &a2));
1347   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1348   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1349   PetscCall(PetscSortInt(sz1, a1));
1350   PetscCall(PetscSortInt(sz2, a2));
1351 
1352   nmatch = 0;
1353   k      = 0;
1354   for (i = 0; i < sz1; i++) {
1355     for (j = k; j < sz2; j++) {
1356       if (a1[i] == a2[j]) {
1357         k = j;
1358         nmatch++;
1359         break;
1360       }
1361     }
1362   }
1363   PetscCall(ISRestoreIndices(isrow, &ptr1));
1364   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1365   PetscCall(PetscFree(a1));
1366   PetscCall(PetscFree(a2));
1367   if (nmatch < sz1) {
1368     *flg = PETSC_FALSE;
1369   } else {
1370     *flg = PETSC_TRUE;
1371   }
1372   PetscFunctionReturn(PETSC_SUCCESS);
1373 }
1374 
1375 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1376 {
1377   IS        iscol_local;
1378   PetscInt  csize;
1379   PetscBool isequal;
1380 
1381   PetscFunctionBegin;
1382   PetscCall(ISGetLocalSize(iscol, &csize));
1383   if (call == MAT_REUSE_MATRIX) {
1384     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1385     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1386   } else {
1387     PetscBool issorted;
1388 
1389     PetscCall(ISAllGather(iscol, &iscol_local));
1390     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1391     PetscCall(ISSorted(iscol_local, &issorted));
1392     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1393   }
1394 
1395   /* now call MatCreateSubMatrix_MPIBAIJ() */
1396   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1397   if (call == MAT_INITIAL_MATRIX) {
1398     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1399     PetscCall(ISDestroy(&iscol_local));
1400   }
1401   PetscFunctionReturn(PETSC_SUCCESS);
1402 }
1403 
1404 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1405 {
1406   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1407 
1408   PetscFunctionBegin;
1409   PetscCall(MatZeroEntries(l->A));
1410   PetscCall(MatZeroEntries(l->B));
1411   PetscFunctionReturn(PETSC_SUCCESS);
1412 }
1413 
1414 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1415 {
1416   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1417   Mat            A = a->A, B = a->B;
1418   PetscLogDouble isend[5], irecv[5];
1419 
1420   PetscFunctionBegin;
1421   info->block_size = (PetscReal)matin->rmap->bs;
1422 
1423   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1424 
1425   isend[0] = info->nz_used;
1426   isend[1] = info->nz_allocated;
1427   isend[2] = info->nz_unneeded;
1428   isend[3] = info->memory;
1429   isend[4] = info->mallocs;
1430 
1431   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1432 
1433   isend[0] += info->nz_used;
1434   isend[1] += info->nz_allocated;
1435   isend[2] += info->nz_unneeded;
1436   isend[3] += info->memory;
1437   isend[4] += info->mallocs;
1438   if (flag == MAT_LOCAL) {
1439     info->nz_used      = isend[0];
1440     info->nz_allocated = isend[1];
1441     info->nz_unneeded  = isend[2];
1442     info->memory       = isend[3];
1443     info->mallocs      = isend[4];
1444   } else if (flag == MAT_GLOBAL_MAX) {
1445     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1446 
1447     info->nz_used      = irecv[0];
1448     info->nz_allocated = irecv[1];
1449     info->nz_unneeded  = irecv[2];
1450     info->memory       = irecv[3];
1451     info->mallocs      = irecv[4];
1452   } else if (flag == MAT_GLOBAL_SUM) {
1453     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1454 
1455     info->nz_used      = irecv[0];
1456     info->nz_allocated = irecv[1];
1457     info->nz_unneeded  = irecv[2];
1458     info->memory       = irecv[3];
1459     info->mallocs      = irecv[4];
1460   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1461   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1462   info->fill_ratio_needed = 0;
1463   info->factor_mallocs    = 0;
1464   PetscFunctionReturn(PETSC_SUCCESS);
1465 }
1466 
1467 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1468 {
1469   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1470   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1471 
1472   PetscFunctionBegin;
1473   switch (op) {
1474   case MAT_NEW_NONZERO_LOCATIONS:
1475   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1476   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1477   case MAT_KEEP_NONZERO_PATTERN:
1478   case MAT_SUBMAT_SINGLEIS:
1479   case MAT_NEW_NONZERO_LOCATION_ERR:
1480     MatCheckPreallocated(A, 1);
1481     PetscCall(MatSetOption(a->A, op, flg));
1482     PetscCall(MatSetOption(a->B, op, flg));
1483     break;
1484   case MAT_ROW_ORIENTED:
1485     MatCheckPreallocated(A, 1);
1486     a->roworiented = flg;
1487 
1488     PetscCall(MatSetOption(a->A, op, flg));
1489     PetscCall(MatSetOption(a->B, op, flg));
1490     break;
1491   case MAT_FORCE_DIAGONAL_ENTRIES:
1492   case MAT_SORTED_FULL:
1493     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1494     break;
1495   case MAT_IGNORE_OFF_PROC_ENTRIES:
1496     a->donotstash = flg;
1497     break;
1498   case MAT_USE_HASH_TABLE:
1499     a->ht_flag = flg;
1500     break;
1501   case MAT_HERMITIAN:
1502     MatCheckPreallocated(A, 1);
1503     PetscCall(MatSetOption(a->A, op, flg));
1504 #if defined(PETSC_USE_COMPLEX)
1505     if (flg) { /* need different mat-vec ops */
1506       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1507       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1508       A->ops->multtranspose    = NULL;
1509       A->ops->multtransposeadd = NULL;
1510       A->symmetric             = PETSC_BOOL3_FALSE;
1511     }
1512 #endif
1513     break;
1514   case MAT_SPD:
1515   case MAT_SYMMETRIC:
1516     MatCheckPreallocated(A, 1);
1517     PetscCall(MatSetOption(a->A, op, flg));
1518 #if defined(PETSC_USE_COMPLEX)
1519     if (flg) { /* restore to use default mat-vec ops */
1520       A->ops->mult             = MatMult_MPISBAIJ;
1521       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1522       A->ops->multtranspose    = MatMult_MPISBAIJ;
1523       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1524     }
1525 #endif
1526     break;
1527   case MAT_STRUCTURALLY_SYMMETRIC:
1528     MatCheckPreallocated(A, 1);
1529     PetscCall(MatSetOption(a->A, op, flg));
1530     break;
1531   case MAT_SYMMETRY_ETERNAL:
1532   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1533     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1534     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1535     break;
1536   case MAT_SPD_ETERNAL:
1537     break;
1538   case MAT_IGNORE_LOWER_TRIANGULAR:
1539     aA->ignore_ltriangular = flg;
1540     break;
1541   case MAT_ERROR_LOWER_TRIANGULAR:
1542     aA->ignore_ltriangular = flg;
1543     break;
1544   case MAT_GETROW_UPPERTRIANGULAR:
1545     aA->getrow_utriangular = flg;
1546     break;
1547   default:
1548     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1549   }
1550   PetscFunctionReturn(PETSC_SUCCESS);
1551 }
1552 
1553 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1554 {
1555   PetscFunctionBegin;
1556   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1557   if (reuse == MAT_INITIAL_MATRIX) {
1558     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1559   } else if (reuse == MAT_REUSE_MATRIX) {
1560     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1561   }
1562   PetscFunctionReturn(PETSC_SUCCESS);
1563 }
1564 
1565 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1566 {
1567   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1568   Mat           a = baij->A, b = baij->B;
1569   PetscInt      nv, m, n;
1570   PetscBool     flg;
1571 
1572   PetscFunctionBegin;
1573   if (ll != rr) {
1574     PetscCall(VecEqual(ll, rr, &flg));
1575     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1576   }
1577   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1578 
1579   PetscCall(MatGetLocalSize(mat, &m, &n));
1580   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1581 
1582   PetscCall(VecGetLocalSize(rr, &nv));
1583   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1584 
1585   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1586 
1587   /* left diagonalscale the off-diagonal part */
1588   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1589 
1590   /* scale the diagonal part */
1591   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1592 
1593   /* right diagonalscale the off-diagonal part */
1594   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1595   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1596   PetscFunctionReturn(PETSC_SUCCESS);
1597 }
1598 
1599 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1600 {
1601   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1602 
1603   PetscFunctionBegin;
1604   PetscCall(MatSetUnfactored(a->A));
1605   PetscFunctionReturn(PETSC_SUCCESS);
1606 }
1607 
1608 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1609 
1610 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1611 {
1612   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1613   Mat           a, b, c, d;
1614   PetscBool     flg;
1615 
1616   PetscFunctionBegin;
1617   a = matA->A;
1618   b = matA->B;
1619   c = matB->A;
1620   d = matB->B;
1621 
1622   PetscCall(MatEqual(a, c, &flg));
1623   if (flg) PetscCall(MatEqual(b, d, &flg));
1624   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1625   PetscFunctionReturn(PETSC_SUCCESS);
1626 }
1627 
1628 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1629 {
1630   PetscBool isbaij;
1631 
1632   PetscFunctionBegin;
1633   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1634   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1635   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1636   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1637     PetscCall(MatGetRowUpperTriangular(A));
1638     PetscCall(MatCopy_Basic(A, B, str));
1639     PetscCall(MatRestoreRowUpperTriangular(A));
1640   } else {
1641     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1642     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1643 
1644     PetscCall(MatCopy(a->A, b->A, str));
1645     PetscCall(MatCopy(a->B, b->B, str));
1646   }
1647   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1648   PetscFunctionReturn(PETSC_SUCCESS);
1649 }
1650 
1651 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1652 {
1653   PetscFunctionBegin;
1654   PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1659 {
1660   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1661   PetscBLASInt  bnz, one                          = 1;
1662   Mat_SeqSBAIJ *xa, *ya;
1663   Mat_SeqBAIJ  *xb, *yb;
1664 
1665   PetscFunctionBegin;
1666   if (str == SAME_NONZERO_PATTERN) {
1667     PetscScalar alpha = a;
1668     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1669     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1670     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1671     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1672     xb = (Mat_SeqBAIJ *)xx->B->data;
1673     yb = (Mat_SeqBAIJ *)yy->B->data;
1674     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1675     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1676     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1677   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1678     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1679     PetscCall(MatAXPY_Basic(Y, a, X, str));
1680     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1681   } else {
1682     Mat       B;
1683     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1684     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1685     PetscCall(MatGetRowUpperTriangular(X));
1686     PetscCall(MatGetRowUpperTriangular(Y));
1687     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1688     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1689     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1690     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1691     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1692     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1693     PetscCall(MatSetType(B, MATMPISBAIJ));
1694     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1695     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1696     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1697     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1698     PetscCall(MatHeaderMerge(Y, &B));
1699     PetscCall(PetscFree(nnz_d));
1700     PetscCall(PetscFree(nnz_o));
1701     PetscCall(MatRestoreRowUpperTriangular(X));
1702     PetscCall(MatRestoreRowUpperTriangular(Y));
1703   }
1704   PetscFunctionReturn(PETSC_SUCCESS);
1705 }
1706 
1707 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1708 {
1709   PetscInt  i;
1710   PetscBool flg;
1711 
1712   PetscFunctionBegin;
1713   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1714   for (i = 0; i < n; i++) {
1715     PetscCall(ISEqual(irow[i], icol[i], &flg));
1716     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1717   }
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1722 {
1723   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1724   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1725 
1726   PetscFunctionBegin;
1727   if (!Y->preallocated) {
1728     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1729   } else if (!aij->nz) {
1730     PetscInt nonew = aij->nonew;
1731     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1732     aij->nonew = nonew;
1733   }
1734   PetscCall(MatShift_Basic(Y, a));
1735   PetscFunctionReturn(PETSC_SUCCESS);
1736 }
1737 
1738 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1739 {
1740   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1741 
1742   PetscFunctionBegin;
1743   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1744   PetscCall(MatMissingDiagonal(a->A, missing, d));
1745   if (d) {
1746     PetscInt rstart;
1747     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1748     *d += rstart / A->rmap->bs;
1749   }
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1754 {
1755   PetscFunctionBegin;
1756   *a = ((Mat_MPISBAIJ *)A->data)->A;
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 /* -------------------------------------------------------------------*/
1761 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1762                                        MatGetRow_MPISBAIJ,
1763                                        MatRestoreRow_MPISBAIJ,
1764                                        MatMult_MPISBAIJ,
1765                                        /*  4*/ MatMultAdd_MPISBAIJ,
1766                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1767                                        MatMultAdd_MPISBAIJ,
1768                                        NULL,
1769                                        NULL,
1770                                        NULL,
1771                                        /* 10*/ NULL,
1772                                        NULL,
1773                                        NULL,
1774                                        MatSOR_MPISBAIJ,
1775                                        MatTranspose_MPISBAIJ,
1776                                        /* 15*/ MatGetInfo_MPISBAIJ,
1777                                        MatEqual_MPISBAIJ,
1778                                        MatGetDiagonal_MPISBAIJ,
1779                                        MatDiagonalScale_MPISBAIJ,
1780                                        MatNorm_MPISBAIJ,
1781                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1782                                        MatAssemblyEnd_MPISBAIJ,
1783                                        MatSetOption_MPISBAIJ,
1784                                        MatZeroEntries_MPISBAIJ,
1785                                        /* 24*/ NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        /* 29*/ MatSetUp_MPISBAIJ,
1791                                        NULL,
1792                                        NULL,
1793                                        MatGetDiagonalBlock_MPISBAIJ,
1794                                        NULL,
1795                                        /* 34*/ MatDuplicate_MPISBAIJ,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        /* 39*/ MatAXPY_MPISBAIJ,
1801                                        MatCreateSubMatrices_MPISBAIJ,
1802                                        MatIncreaseOverlap_MPISBAIJ,
1803                                        MatGetValues_MPISBAIJ,
1804                                        MatCopy_MPISBAIJ,
1805                                        /* 44*/ NULL,
1806                                        MatScale_MPISBAIJ,
1807                                        MatShift_MPISBAIJ,
1808                                        NULL,
1809                                        NULL,
1810                                        /* 49*/ NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        NULL,
1815                                        /* 54*/ NULL,
1816                                        NULL,
1817                                        MatSetUnfactored_MPISBAIJ,
1818                                        NULL,
1819                                        MatSetValuesBlocked_MPISBAIJ,
1820                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        /* 64*/ NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1831                                        NULL,
1832                                        MatConvert_MPISBAIJ_Basic,
1833                                        NULL,
1834                                        NULL,
1835                                        /* 74*/ NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        /* 79*/ NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        MatLoad_MPISBAIJ,
1845                                        /* 84*/ NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        /* 89*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        /* 94*/ NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        /* 99*/ NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        MatConjugate_MPISBAIJ,
1864                                        NULL,
1865                                        /*104*/ NULL,
1866                                        MatRealPart_MPISBAIJ,
1867                                        MatImaginaryPart_MPISBAIJ,
1868                                        MatGetRowUpperTriangular_MPISBAIJ,
1869                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1870                                        /*109*/ NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        MatMissingDiagonal_MPISBAIJ,
1875                                        /*114*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        /*119*/ NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        /*124*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        /*129*/ NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        /*134*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        /*139*/ MatSetBlockSizes_Default,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        /*150*/ NULL,
1912                                        NULL};
1913 
1914 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1915 {
1916   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1917   PetscInt      i, mbs, Mbs;
1918   PetscMPIInt   size;
1919 
1920   PetscFunctionBegin;
1921   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1922   PetscCall(PetscLayoutSetUp(B->rmap));
1923   PetscCall(PetscLayoutSetUp(B->cmap));
1924   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1925   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1926   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1927 
1928   mbs = B->rmap->n / bs;
1929   Mbs = B->rmap->N / bs;
1930   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1931 
1932   B->rmap->bs = bs;
1933   b->bs2      = bs * bs;
1934   b->mbs      = mbs;
1935   b->Mbs      = Mbs;
1936   b->nbs      = B->cmap->n / bs;
1937   b->Nbs      = B->cmap->N / bs;
1938 
1939   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1940   b->rstartbs = B->rmap->rstart / bs;
1941   b->rendbs   = B->rmap->rend / bs;
1942 
1943   b->cstartbs = B->cmap->rstart / bs;
1944   b->cendbs   = B->cmap->rend / bs;
1945 
1946 #if defined(PETSC_USE_CTABLE)
1947   PetscCall(PetscHMapIDestroy(&b->colmap));
1948 #else
1949   PetscCall(PetscFree(b->colmap));
1950 #endif
1951   PetscCall(PetscFree(b->garray));
1952   PetscCall(VecDestroy(&b->lvec));
1953   PetscCall(VecScatterDestroy(&b->Mvctx));
1954   PetscCall(VecDestroy(&b->slvec0));
1955   PetscCall(VecDestroy(&b->slvec0b));
1956   PetscCall(VecDestroy(&b->slvec1));
1957   PetscCall(VecDestroy(&b->slvec1a));
1958   PetscCall(VecDestroy(&b->slvec1b));
1959   PetscCall(VecScatterDestroy(&b->sMvctx));
1960 
1961   /* Because the B will have been resized we simply destroy it and create a new one each time */
1962   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1963   PetscCall(MatDestroy(&b->B));
1964   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1965   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1966   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1967 
1968   if (!B->preallocated) {
1969     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1970     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1971     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1972     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1973   }
1974 
1975   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1976   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1977 
1978   B->preallocated  = PETSC_TRUE;
1979   B->was_assembled = PETSC_FALSE;
1980   B->assembled     = PETSC_FALSE;
1981   PetscFunctionReturn(PETSC_SUCCESS);
1982 }
1983 
1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1985 {
1986   PetscInt        m, rstart, cend;
1987   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1988   const PetscInt *JJ          = NULL;
1989   PetscScalar    *values      = NULL;
1990   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1991   PetscBool       nooffprocentries;
1992 
1993   PetscFunctionBegin;
1994   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1995   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1996   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1997   PetscCall(PetscLayoutSetUp(B->rmap));
1998   PetscCall(PetscLayoutSetUp(B->cmap));
1999   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2000   m      = B->rmap->n / bs;
2001   rstart = B->rmap->rstart / bs;
2002   cend   = B->cmap->rend / bs;
2003 
2004   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2005   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2006   for (i = 0; i < m; i++) {
2007     nz = ii[i + 1] - ii[i];
2008     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2009     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2010     JJ = jj + ii[i];
2011     bd = 0;
2012     for (j = 0; j < nz; j++) {
2013       if (*JJ >= i + rstart) break;
2014       JJ++;
2015       bd++;
2016     }
2017     d = 0;
2018     for (; j < nz; j++) {
2019       if (*JJ++ >= cend) break;
2020       d++;
2021     }
2022     d_nnz[i] = d;
2023     o_nnz[i] = nz - d - bd;
2024     nz       = nz - bd;
2025     nz_max   = PetscMax(nz_max, nz);
2026   }
2027   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2028   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2029   PetscCall(PetscFree2(d_nnz, o_nnz));
2030 
2031   values = (PetscScalar *)V;
2032   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2033   for (i = 0; i < m; i++) {
2034     PetscInt        row   = i + rstart;
2035     PetscInt        ncols = ii[i + 1] - ii[i];
2036     const PetscInt *icols = jj + ii[i];
2037     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2038       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2039       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2040     } else { /* block ordering does not match so we can only insert one block at a time. */
2041       PetscInt j;
2042       for (j = 0; j < ncols; j++) {
2043         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2044         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2045       }
2046     }
2047   }
2048 
2049   if (!V) PetscCall(PetscFree(values));
2050   nooffprocentries    = B->nooffprocentries;
2051   B->nooffprocentries = PETSC_TRUE;
2052   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2053   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2054   B->nooffprocentries = nooffprocentries;
2055 
2056   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2057   PetscFunctionReturn(PETSC_SUCCESS);
2058 }
2059 
2060 /*MC
2061    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2062    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2063    the matrix is stored.
2064 
2065    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2066    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2067 
2068    Options Database Keys:
2069 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2070 
2071    Note:
2072      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2073      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2074 
2075    Level: beginner
2076 
2077 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2078 M*/
2079 
2080 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2081 {
2082   Mat_MPISBAIJ *b;
2083   PetscBool     flg = PETSC_FALSE;
2084 
2085   PetscFunctionBegin;
2086   PetscCall(PetscNew(&b));
2087   B->data = (void *)b;
2088   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2089 
2090   B->ops->destroy = MatDestroy_MPISBAIJ;
2091   B->ops->view    = MatView_MPISBAIJ;
2092   B->assembled    = PETSC_FALSE;
2093   B->insertmode   = NOT_SET_VALUES;
2094 
2095   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2096   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2097 
2098   /* build local table of row and column ownerships */
2099   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2100 
2101   /* build cache for off array entries formed */
2102   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2103 
2104   b->donotstash  = PETSC_FALSE;
2105   b->colmap      = NULL;
2106   b->garray      = NULL;
2107   b->roworiented = PETSC_TRUE;
2108 
2109   /* stuff used in block assembly */
2110   b->barray = NULL;
2111 
2112   /* stuff used for matrix vector multiply */
2113   b->lvec    = NULL;
2114   b->Mvctx   = NULL;
2115   b->slvec0  = NULL;
2116   b->slvec0b = NULL;
2117   b->slvec1  = NULL;
2118   b->slvec1a = NULL;
2119   b->slvec1b = NULL;
2120   b->sMvctx  = NULL;
2121 
2122   /* stuff for MatGetRow() */
2123   b->rowindices   = NULL;
2124   b->rowvalues    = NULL;
2125   b->getrowactive = PETSC_FALSE;
2126 
2127   /* hash table stuff */
2128   b->ht           = NULL;
2129   b->hd           = NULL;
2130   b->ht_size      = 0;
2131   b->ht_flag      = PETSC_FALSE;
2132   b->ht_fact      = 0;
2133   b->ht_total_ct  = 0;
2134   b->ht_insert_ct = 0;
2135 
2136   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2137   b->ijonly = PETSC_FALSE;
2138 
2139   b->in_loc = NULL;
2140   b->v_loc  = NULL;
2141   b->n_loc  = 0;
2142 
2143   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2144   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2145   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2147 #if defined(PETSC_HAVE_ELEMENTAL)
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2149 #endif
2150 #if defined(PETSC_HAVE_SCALAPACK)
2151   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2152 #endif
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2155 
2156   B->symmetric                   = PETSC_BOOL3_TRUE;
2157   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2158   B->symmetry_eternal            = PETSC_TRUE;
2159   B->structural_symmetry_eternal = PETSC_TRUE;
2160 #if defined(PETSC_USE_COMPLEX)
2161   B->hermitian = PETSC_BOOL3_FALSE;
2162 #else
2163   B->hermitian = PETSC_BOOL3_TRUE;
2164 #endif
2165 
2166   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2167   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2168   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2169   if (flg) {
2170     PetscReal fact = 1.39;
2171     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2172     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2173     if (fact <= 1.0) fact = 1.39;
2174     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2175     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2176   }
2177   PetscOptionsEnd();
2178   PetscFunctionReturn(PETSC_SUCCESS);
2179 }
2180 
2181 /*MC
2182    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2183 
2184    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2185    and `MATMPISBAIJ` otherwise.
2186 
2187    Options Database Key:
2188 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2189 
2190   Level: beginner
2191 
2192 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2193 M*/
2194 
2195 /*@C
2196    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2197    the user should preallocate the matrix storage by setting the parameters
2198    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2199    performance can be increased by more than a factor of 50.
2200 
2201    Collective
2202 
2203    Input Parameters:
2204 +  B - the matrix
2205 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2206           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2207 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2208            submatrix  (same for all local rows)
2209 .  d_nnz - array containing the number of block nonzeros in the various block rows
2210            in the upper triangular and diagonal part of the in diagonal portion of the local
2211            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2212            for the diagonal entry and set a value even if it is zero.
2213 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2214            submatrix (same for all local rows).
2215 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2216            off-diagonal portion of the local submatrix that is right of the diagonal
2217            (possibly different for each block row) or NULL.
2218 
2219    Options Database Keys:
2220 +   -mat_no_unroll - uses code that does not unroll the loops in the
2221                      block calculations (much slower)
2222 -   -mat_block_size - size of the blocks to use
2223 
2224    Notes:
2225 
2226    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2227    than it must be used on all processors that share the object for that argument.
2228 
2229    If the *_nnz parameter is given then the *_nz parameter is ignored
2230 
2231    Storage Information:
2232    For a square global matrix we define each processor's diagonal portion
2233    to be its local rows and the corresponding columns (a square submatrix);
2234    each processor's off-diagonal portion encompasses the remainder of the
2235    local matrix (a rectangular submatrix).
2236 
2237    The user can specify preallocated storage for the diagonal part of
2238    the local submatrix with either d_nz or d_nnz (not both).  Set
2239    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2240    memory allocation.  Likewise, specify preallocated storage for the
2241    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2242 
2243    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2244    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2245    You can also run with the option -info and look for messages with the string
2246    malloc in them to see if additional memory allocation was needed.
2247 
2248    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2249    the figure below we depict these three local rows and all columns (0-11).
2250 
2251 .vb
2252            0 1 2 3 4 5 6 7 8 9 10 11
2253           --------------------------
2254    row 3  |. . . d d d o o o o  o  o
2255    row 4  |. . . d d d o o o o  o  o
2256    row 5  |. . . d d d o o o o  o  o
2257           --------------------------
2258 .ve
2259 
2260    Thus, any entries in the d locations are stored in the d (diagonal)
2261    submatrix, and any entries in the o locations are stored in the
2262    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2263    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2264 
2265    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2266    plus the diagonal part of the d matrix,
2267    and o_nz should indicate the number of block nonzeros per row in the o matrix
2268 
2269    In general, for PDE problems in which most nonzeros are near the diagonal,
2270    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2271    or you will get TERRIBLE performance; see the users' manual chapter on
2272    matrices.
2273 
2274    Level: intermediate
2275 
2276 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2277 @*/
2278 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2279 {
2280   PetscFunctionBegin;
2281   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2282   PetscValidType(B, 1);
2283   PetscValidLogicalCollectiveInt(B, bs, 2);
2284   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2285   PetscFunctionReturn(PETSC_SUCCESS);
2286 }
2287 
2288 /*@C
2289    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2290    (block compressed row).  For good matrix assembly performance
2291    the user should preallocate the matrix storage by setting the parameters
2292    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2293    performance can be increased by more than a factor of 50.
2294 
2295    Collective
2296 
2297    Input Parameters:
2298 +  comm - MPI communicator
2299 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2300           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2301 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2302            This value should be the same as the local size used in creating the
2303            y vector for the matrix-vector product y = Ax.
2304 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2305            This value should be the same as the local size used in creating the
2306            x vector for the matrix-vector product y = Ax.
2307 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2308 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2309 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2310            submatrix (same for all local rows)
2311 .  d_nnz - array containing the number of block nonzeros in the various block rows
2312            in the upper triangular portion of the in diagonal portion of the local
2313            (possibly different for each block block row) or NULL.
2314            If you plan to factor the matrix you must leave room for the diagonal entry and
2315            set its value even if it is zero.
2316 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2317            submatrix (same for all local rows).
2318 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2319            off-diagonal portion of the local submatrix (possibly different for
2320            each block row) or NULL.
2321 
2322    Output Parameter:
2323 .  A - the matrix
2324 
2325    Options Database Keys:
2326 +   -mat_no_unroll - uses code that does not unroll the loops in the
2327                      block calculations (much slower)
2328 .   -mat_block_size - size of the blocks to use
2329 -   -mat_mpi - use the parallel matrix data structures even on one processor
2330                (defaults to using SeqBAIJ format on one processor)
2331 
2332    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2333    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2334    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2335 
2336    Notes:
2337    The number of rows and columns must be divisible by blocksize.
2338    This matrix type does not support complex Hermitian operation.
2339 
2340    The user MUST specify either the local or global matrix dimensions
2341    (possibly both).
2342 
2343    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2344    than it must be used on all processors that share the object for that argument.
2345 
2346    If the *_nnz parameter is given then the *_nz parameter is ignored
2347 
2348    Storage Information:
2349    For a square global matrix we define each processor's diagonal portion
2350    to be its local rows and the corresponding columns (a square submatrix);
2351    each processor's off-diagonal portion encompasses the remainder of the
2352    local matrix (a rectangular submatrix).
2353 
2354    The user can specify preallocated storage for the diagonal part of
2355    the local submatrix with either d_nz or d_nnz (not both). Set
2356    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2357    memory allocation. Likewise, specify preallocated storage for the
2358    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2359 
2360    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2361    the figure below we depict these three local rows and all columns (0-11).
2362 
2363 .vb
2364            0 1 2 3 4 5 6 7 8 9 10 11
2365           --------------------------
2366    row 3  |. . . d d d o o o o  o  o
2367    row 4  |. . . d d d o o o o  o  o
2368    row 5  |. . . d d d o o o o  o  o
2369           --------------------------
2370 .ve
2371 
2372    Thus, any entries in the d locations are stored in the d (diagonal)
2373    submatrix, and any entries in the o locations are stored in the
2374    o (off-diagonal) submatrix. Note that the d matrix is stored in
2375    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2376 
2377    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2378    plus the diagonal part of the d matrix,
2379    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2380    In general, for PDE problems in which most nonzeros are near the diagonal,
2381    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2382    or you will get TERRIBLE performance; see the users' manual chapter on
2383    matrices.
2384 
2385    Level: intermediate
2386 
2387 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2388 @*/
2389 
2390 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2391 {
2392   PetscMPIInt size;
2393 
2394   PetscFunctionBegin;
2395   PetscCall(MatCreate(comm, A));
2396   PetscCall(MatSetSizes(*A, m, n, M, N));
2397   PetscCallMPI(MPI_Comm_size(comm, &size));
2398   if (size > 1) {
2399     PetscCall(MatSetType(*A, MATMPISBAIJ));
2400     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2401   } else {
2402     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2403     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2404   }
2405   PetscFunctionReturn(PETSC_SUCCESS);
2406 }
2407 
2408 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2409 {
2410   Mat           mat;
2411   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2412   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2413   PetscScalar  *array;
2414 
2415   PetscFunctionBegin;
2416   *newmat = NULL;
2417 
2418   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2419   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2420   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2421   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2422   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2423 
2424   mat->factortype   = matin->factortype;
2425   mat->preallocated = PETSC_TRUE;
2426   mat->assembled    = PETSC_TRUE;
2427   mat->insertmode   = NOT_SET_VALUES;
2428 
2429   a      = (Mat_MPISBAIJ *)mat->data;
2430   a->bs2 = oldmat->bs2;
2431   a->mbs = oldmat->mbs;
2432   a->nbs = oldmat->nbs;
2433   a->Mbs = oldmat->Mbs;
2434   a->Nbs = oldmat->Nbs;
2435 
2436   a->size         = oldmat->size;
2437   a->rank         = oldmat->rank;
2438   a->donotstash   = oldmat->donotstash;
2439   a->roworiented  = oldmat->roworiented;
2440   a->rowindices   = NULL;
2441   a->rowvalues    = NULL;
2442   a->getrowactive = PETSC_FALSE;
2443   a->barray       = NULL;
2444   a->rstartbs     = oldmat->rstartbs;
2445   a->rendbs       = oldmat->rendbs;
2446   a->cstartbs     = oldmat->cstartbs;
2447   a->cendbs       = oldmat->cendbs;
2448 
2449   /* hash table stuff */
2450   a->ht           = NULL;
2451   a->hd           = NULL;
2452   a->ht_size      = 0;
2453   a->ht_flag      = oldmat->ht_flag;
2454   a->ht_fact      = oldmat->ht_fact;
2455   a->ht_total_ct  = 0;
2456   a->ht_insert_ct = 0;
2457 
2458   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2459   if (oldmat->colmap) {
2460 #if defined(PETSC_USE_CTABLE)
2461     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2462 #else
2463     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2464     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2465 #endif
2466   } else a->colmap = NULL;
2467 
2468   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2469     PetscCall(PetscMalloc1(len, &a->garray));
2470     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2471   } else a->garray = NULL;
2472 
2473   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2474   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2475   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2476 
2477   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2478   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2479 
2480   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2481   PetscCall(VecGetArray(a->slvec1, &array));
2482   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2483   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2484   PetscCall(VecRestoreArray(a->slvec1, &array));
2485   PetscCall(VecGetArray(a->slvec0, &array));
2486   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2487   PetscCall(VecRestoreArray(a->slvec0, &array));
2488 
2489   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2490   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2491   a->sMvctx = oldmat->sMvctx;
2492 
2493   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2494   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2495   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2496   *newmat = mat;
2497   PetscFunctionReturn(PETSC_SUCCESS);
2498 }
2499 
2500 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2501 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2502 
2503 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2504 {
2505   PetscBool isbinary;
2506 
2507   PetscFunctionBegin;
2508   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2509   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2510   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2511   PetscFunctionReturn(PETSC_SUCCESS);
2512 }
2513 
2514 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2515 {
2516   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2517   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2518   PetscReal     atmp;
2519   PetscReal    *work, *svalues, *rvalues;
2520   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2521   PetscMPIInt   rank, size;
2522   PetscInt     *rowners_bs, dest, count, source;
2523   PetscScalar  *va;
2524   MatScalar    *ba;
2525   MPI_Status    stat;
2526 
2527   PetscFunctionBegin;
2528   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2529   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2530   PetscCall(VecGetArray(v, &va));
2531 
2532   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2533   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2534 
2535   bs  = A->rmap->bs;
2536   mbs = a->mbs;
2537   Mbs = a->Mbs;
2538   ba  = b->a;
2539   bi  = b->i;
2540   bj  = b->j;
2541 
2542   /* find ownerships */
2543   rowners_bs = A->rmap->range;
2544 
2545   /* each proc creates an array to be distributed */
2546   PetscCall(PetscCalloc1(bs * Mbs, &work));
2547 
2548   /* row_max for B */
2549   if (rank != size - 1) {
2550     for (i = 0; i < mbs; i++) {
2551       ncols = bi[1] - bi[0];
2552       bi++;
2553       brow = bs * i;
2554       for (j = 0; j < ncols; j++) {
2555         bcol = bs * (*bj);
2556         for (kcol = 0; kcol < bs; kcol++) {
2557           col = bcol + kcol;           /* local col index */
2558           col += rowners_bs[rank + 1]; /* global col index */
2559           for (krow = 0; krow < bs; krow++) {
2560             atmp = PetscAbsScalar(*ba);
2561             ba++;
2562             row = brow + krow; /* local row index */
2563             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2564             if (work[col] < atmp) work[col] = atmp;
2565           }
2566         }
2567         bj++;
2568       }
2569     }
2570 
2571     /* send values to its owners */
2572     for (dest = rank + 1; dest < size; dest++) {
2573       svalues = work + rowners_bs[dest];
2574       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2575       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2576     }
2577   }
2578 
2579   /* receive values */
2580   if (rank) {
2581     rvalues = work;
2582     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2583     for (source = 0; source < rank; source++) {
2584       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2585       /* process values */
2586       for (i = 0; i < count; i++) {
2587         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2588       }
2589     }
2590   }
2591 
2592   PetscCall(VecRestoreArray(v, &va));
2593   PetscCall(PetscFree(work));
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2598 {
2599   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2600   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2601   PetscScalar       *x, *ptr, *from;
2602   Vec                bb1;
2603   const PetscScalar *b;
2604 
2605   PetscFunctionBegin;
2606   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2607   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2608 
2609   if (flag == SOR_APPLY_UPPER) {
2610     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2611     PetscFunctionReturn(PETSC_SUCCESS);
2612   }
2613 
2614   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2615     if (flag & SOR_ZERO_INITIAL_GUESS) {
2616       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2617       its--;
2618     }
2619 
2620     PetscCall(VecDuplicate(bb, &bb1));
2621     while (its--) {
2622       /* lower triangular part: slvec0b = - B^T*xx */
2623       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2624 
2625       /* copy xx into slvec0a */
2626       PetscCall(VecGetArray(mat->slvec0, &ptr));
2627       PetscCall(VecGetArray(xx, &x));
2628       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2629       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2630 
2631       PetscCall(VecScale(mat->slvec0, -1.0));
2632 
2633       /* copy bb into slvec1a */
2634       PetscCall(VecGetArray(mat->slvec1, &ptr));
2635       PetscCall(VecGetArrayRead(bb, &b));
2636       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2637       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2638 
2639       /* set slvec1b = 0 */
2640       PetscCall(VecSet(mat->slvec1b, 0.0));
2641 
2642       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2643       PetscCall(VecRestoreArray(xx, &x));
2644       PetscCall(VecRestoreArrayRead(bb, &b));
2645       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2646 
2647       /* upper triangular part: bb1 = bb1 - B*x */
2648       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2649 
2650       /* local diagonal sweep */
2651       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2652     }
2653     PetscCall(VecDestroy(&bb1));
2654   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2655     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2656   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2657     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2658   } else if (flag & SOR_EISENSTAT) {
2659     Vec                xx1;
2660     PetscBool          hasop;
2661     const PetscScalar *diag;
2662     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2663     PetscInt           i, n;
2664 
2665     if (!mat->xx1) {
2666       PetscCall(VecDuplicate(bb, &mat->xx1));
2667       PetscCall(VecDuplicate(bb, &mat->bb1));
2668     }
2669     xx1 = mat->xx1;
2670     bb1 = mat->bb1;
2671 
2672     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2673 
2674     if (!mat->diag) {
2675       /* this is wrong for same matrix with new nonzero values */
2676       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2677       PetscCall(MatGetDiagonal(matin, mat->diag));
2678     }
2679     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2680 
2681     if (hasop) {
2682       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2683       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2684     } else {
2685       /*
2686           These two lines are replaced by code that may be a bit faster for a good compiler
2687       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2688       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2689       */
2690       PetscCall(VecGetArray(mat->slvec1a, &sl));
2691       PetscCall(VecGetArrayRead(mat->diag, &diag));
2692       PetscCall(VecGetArrayRead(bb, &b));
2693       PetscCall(VecGetArray(xx, &x));
2694       PetscCall(VecGetLocalSize(xx, &n));
2695       if (omega == 1.0) {
2696         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2697         PetscCall(PetscLogFlops(2.0 * n));
2698       } else {
2699         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2700         PetscCall(PetscLogFlops(3.0 * n));
2701       }
2702       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2703       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2704       PetscCall(VecRestoreArrayRead(bb, &b));
2705       PetscCall(VecRestoreArray(xx, &x));
2706     }
2707 
2708     /* multiply off-diagonal portion of matrix */
2709     PetscCall(VecSet(mat->slvec1b, 0.0));
2710     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2711     PetscCall(VecGetArray(mat->slvec0, &from));
2712     PetscCall(VecGetArray(xx, &x));
2713     PetscCall(PetscArraycpy(from, x, bs * mbs));
2714     PetscCall(VecRestoreArray(mat->slvec0, &from));
2715     PetscCall(VecRestoreArray(xx, &x));
2716     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2717     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2718     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2719 
2720     /* local sweep */
2721     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2722     PetscCall(VecAXPY(xx, 1.0, xx1));
2723   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2724   PetscFunctionReturn(PETSC_SUCCESS);
2725 }
2726 
2727 /*@
2728      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2729          CSR format the local rows.
2730 
2731    Collective
2732 
2733    Input Parameters:
2734 +  comm - MPI communicator
2735 .  bs - the block size, only a block size of 1 is supported
2736 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2737 .  n - This value should be the same as the local size used in creating the
2738        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2739        calculated if N is given) For square matrices n is almost always m.
2740 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2741 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2742 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2743 .   j - column indices
2744 -   a - matrix values
2745 
2746    Output Parameter:
2747 .   mat - the matrix
2748 
2749    Level: intermediate
2750 
2751    Notes:
2752        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2753      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2754      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2755 
2756        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2757 
2758 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2759           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2760 @*/
2761 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2762 {
2763   PetscFunctionBegin;
2764   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2765   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2766   PetscCall(MatCreate(comm, mat));
2767   PetscCall(MatSetSizes(*mat, m, n, M, N));
2768   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2769   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2770   PetscFunctionReturn(PETSC_SUCCESS);
2771 }
2772 
2773 /*@C
2774    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2775 
2776    Collective
2777 
2778    Input Parameters:
2779 +  B - the matrix
2780 .  bs - the block size
2781 .  i - the indices into j for the start of each local row (starts with zero)
2782 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2783 -  v - optional values in the matrix
2784 
2785    Level: advanced
2786 
2787    Notes:
2788    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2789    and usually the numerical values as well
2790 
2791    Any entries below the diagonal are ignored
2792 
2793 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2794 @*/
2795 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2796 {
2797   PetscFunctionBegin;
2798   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2799   PetscFunctionReturn(PETSC_SUCCESS);
2800 }
2801 
2802 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2803 {
2804   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2805   PetscInt    *indx;
2806   PetscScalar *values;
2807 
2808   PetscFunctionBegin;
2809   PetscCall(MatGetSize(inmat, &m, &N));
2810   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2811     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2812     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2813     PetscInt     *bindx, rmax = a->rmax, j;
2814     PetscMPIInt   rank, size;
2815 
2816     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2817     mbs = m / bs;
2818     Nbs = N / cbs;
2819     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2820     nbs = n / cbs;
2821 
2822     PetscCall(PetscMalloc1(rmax, &bindx));
2823     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2824 
2825     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2826     PetscCallMPI(MPI_Comm_rank(comm, &size));
2827     if (rank == size - 1) {
2828       /* Check sum(nbs) = Nbs */
2829       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2830     }
2831 
2832     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2833     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2834     for (i = 0; i < mbs; i++) {
2835       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2836       nnz = nnz / bs;
2837       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2838       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2839       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2840     }
2841     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2842     PetscCall(PetscFree(bindx));
2843 
2844     PetscCall(MatCreate(comm, outmat));
2845     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2846     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2847     PetscCall(MatSetType(*outmat, MATSBAIJ));
2848     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2849     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2850     MatPreallocateEnd(dnz, onz);
2851   }
2852 
2853   /* numeric phase */
2854   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2855   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2856 
2857   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2858   for (i = 0; i < m; i++) {
2859     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2860     Ii = i + rstart;
2861     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2862     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2863   }
2864   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2865   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2866   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2867   PetscFunctionReturn(PETSC_SUCCESS);
2868 }
2869