xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 43cdf1ebbcbfa05bee08e48007ef1bae3f20f4e9)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat       preallocator;
18   PetscInt  r, rstart, rend;
19   PetscInt  bs, i, m, n, M, N;
20   PetscBool cong = PETSC_TRUE;
21 
22   PetscFunctionBegin;
23   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
24   PetscValidLogicalCollectiveInt(B, nm, 2);
25   for (i = 0; i < nm; i++) {
26     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
27     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
28     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
29   }
30   PetscValidLogicalCollectiveBool(B, fill, 5);
31   PetscCall(MatGetBlockSize(B, &bs));
32   PetscCall(MatGetSize(B, &M, &N));
33   PetscCall(MatGetLocalSize(B, &m, &n));
34   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
35   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
36   PetscCall(MatSetBlockSize(preallocator, bs));
37   PetscCall(MatSetSizes(preallocator, m, n, M, N));
38   PetscCall(MatSetUp(preallocator));
39   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
40   for (r = rstart; r < rend; ++r) {
41     PetscInt           ncols;
42     const PetscInt    *row;
43     const PetscScalar *vals;
44 
45     for (i = 0; i < nm; i++) {
46       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
47       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
48       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
49       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
50     }
51   }
52   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
53   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
54   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
55   PetscCall(MatDestroy(&preallocator));
56   PetscFunctionReturn(0);
57 }
58 
59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat      B;
62   PetscInt r;
63 
64   PetscFunctionBegin;
65   if (reuse != MAT_REUSE_MATRIX) {
66     PetscBool symm = PETSC_TRUE, isdense;
67     PetscInt  bs;
68 
69     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
70     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
71     PetscCall(MatSetType(B, newtype));
72     PetscCall(MatGetBlockSize(A, &bs));
73     PetscCall(MatSetBlockSize(B, bs));
74     PetscCall(PetscLayoutSetUp(B->rmap));
75     PetscCall(PetscLayoutSetUp(B->cmap));
76     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
77     if (!isdense) {
78       PetscCall(MatGetRowUpperTriangular(A));
79       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
80       PetscCall(MatRestoreRowUpperTriangular(A));
81     } else {
82       PetscCall(MatSetUp(B));
83     }
84   } else {
85     B = *newmat;
86     PetscCall(MatZeroEntries(B));
87   }
88 
89   PetscCall(MatGetRowUpperTriangular(A));
90   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
91     PetscInt           ncols;
92     const PetscInt    *row;
93     const PetscScalar *vals;
94 
95     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
96     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
97 #if defined(PETSC_USE_COMPLEX)
98     if (A->hermitian == PETSC_BOOL3_TRUE) {
99       PetscInt i;
100       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
101     } else {
102       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
103     }
104 #else
105     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
106 #endif
107     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
108   }
109   PetscCall(MatRestoreRowUpperTriangular(A));
110   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
111   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
112 
113   if (reuse == MAT_INPLACE_MATRIX) {
114     PetscCall(MatHeaderReplace(A, &B));
115   } else {
116     *newmat = B;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
122 {
123   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
124 
125   PetscFunctionBegin;
126   PetscCall(MatStoreValues(aij->A));
127   PetscCall(MatStoreValues(aij->B));
128   PetscFunctionReturn(0);
129 }
130 
131 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
132 {
133   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
134 
135   PetscFunctionBegin;
136   PetscCall(MatRetrieveValues(aij->A));
137   PetscCall(MatRetrieveValues(aij->B));
138   PetscFunctionReturn(0);
139 }
140 
141 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
142   { \
143     brow = row / bs; \
144     rp   = aj + ai[brow]; \
145     ap   = aa + bs2 * ai[brow]; \
146     rmax = aimax[brow]; \
147     nrow = ailen[brow]; \
148     bcol = col / bs; \
149     ridx = row % bs; \
150     cidx = col % bs; \
151     low  = 0; \
152     high = nrow; \
153     while (high - low > 3) { \
154       t = (low + high) / 2; \
155       if (rp[t] > bcol) high = t; \
156       else low = t; \
157     } \
158     for (_i = low; _i < high; _i++) { \
159       if (rp[_i] > bcol) break; \
160       if (rp[_i] == bcol) { \
161         bap = ap + bs2 * _i + bs * cidx + ridx; \
162         if (addv == ADD_VALUES) *bap += value; \
163         else *bap = value; \
164         goto a_noinsert; \
165       } \
166     } \
167     if (a->nonew == 1) goto a_noinsert; \
168     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
170     N = nrow++ - 1; \
171     /* shift up all the later entries in this row */ \
172     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
173     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
174     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
175     rp[_i]                          = bcol; \
176     ap[bs2 * _i + bs * cidx + ridx] = value; \
177     A->nonzerostate++; \
178   a_noinsert:; \
179     ailen[brow] = nrow; \
180   }
181 
182 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
183   { \
184     brow = row / bs; \
185     rp   = bj + bi[brow]; \
186     ap   = ba + bs2 * bi[brow]; \
187     rmax = bimax[brow]; \
188     nrow = bilen[brow]; \
189     bcol = col / bs; \
190     ridx = row % bs; \
191     cidx = col % bs; \
192     low  = 0; \
193     high = nrow; \
194     while (high - low > 3) { \
195       t = (low + high) / 2; \
196       if (rp[t] > bcol) high = t; \
197       else low = t; \
198     } \
199     for (_i = low; _i < high; _i++) { \
200       if (rp[_i] > bcol) break; \
201       if (rp[_i] == bcol) { \
202         bap = ap + bs2 * _i + bs * cidx + ridx; \
203         if (addv == ADD_VALUES) *bap += value; \
204         else *bap = value; \
205         goto b_noinsert; \
206       } \
207     } \
208     if (b->nonew == 1) goto b_noinsert; \
209     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
210     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
211     N = nrow++ - 1; \
212     /* shift up all the later entries in this row */ \
213     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
214     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
215     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
216     rp[_i]                          = bcol; \
217     ap[bs2 * _i + bs * cidx + ridx] = value; \
218     B->nonzerostate++; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   }
222 
223 /* Only add/insert a(i,j) with i<=j (blocks).
224    Any a(i,j) with i>j input by user is ingored or generates an error
225 */
226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
227 {
228   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
229   MatScalar     value;
230   PetscBool     roworiented = baij->roworiented;
231   PetscInt      i, j, row, col;
232   PetscInt      rstart_orig = mat->rmap->rstart;
233   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
234   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
235 
236   /* Some Variables required in the macro */
237   Mat           A     = baij->A;
238   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
239   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
240   MatScalar    *aa = a->a;
241 
242   Mat          B     = baij->B;
243   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
244   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
245   MatScalar   *ba = b->a;
246 
247   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
248   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
249   MatScalar *ap, *bap;
250 
251   /* for stash */
252   PetscInt   n_loc, *in_loc = NULL;
253   MatScalar *v_loc = NULL;
254 
255   PetscFunctionBegin;
256   if (!baij->donotstash) {
257     if (n > baij->n_loc) {
258       PetscCall(PetscFree(baij->in_loc));
259       PetscCall(PetscFree(baij->v_loc));
260       PetscCall(PetscMalloc1(n, &baij->in_loc));
261       PetscCall(PetscMalloc1(n, &baij->v_loc));
262 
263       baij->n_loc = n;
264     }
265     in_loc = baij->in_loc;
266     v_loc  = baij->v_loc;
267   }
268 
269   for (i = 0; i < m; i++) {
270     if (im[i] < 0) continue;
271     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
272     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
273       row = im[i] - rstart_orig;                     /* local row index */
274       for (j = 0; j < n; j++) {
275         if (im[i] / bs > in[j] / bs) {
276           if (a->ignore_ltriangular) {
277             continue; /* ignore lower triangular blocks */
278           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
279         }
280         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
281           col  = in[j] - cstart_orig;                    /* local col index */
282           brow = row / bs;
283           bcol = col / bs;
284           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
285           if (roworiented) value = v[i * n + j];
286           else value = v[i + j * m];
287           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
288           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
289         } else if (in[j] < 0) {
290           continue;
291         } else {
292           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
293           /* off-diag entry (B) */
294           if (mat->was_assembled) {
295             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
296 #if defined(PETSC_USE_CTABLE)
297             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
298             col = col - 1;
299 #else
300             col = baij->colmap[in[j] / bs] - 1;
301 #endif
302             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
303               PetscCall(MatDisAssemble_MPISBAIJ(mat));
304               col = in[j];
305               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306               B     = baij->B;
307               b     = (Mat_SeqBAIJ *)(B)->data;
308               bimax = b->imax;
309               bi    = b->i;
310               bilen = b->ilen;
311               bj    = b->j;
312               ba    = b->a;
313             } else col += in[j] % bs;
314           } else col = in[j];
315           if (roworiented) value = v[i * n + j];
316           else value = v[i + j * m];
317           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
318           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
319         }
320       }
321     } else { /* off processor entry */
322       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
323       if (!baij->donotstash) {
324         mat->assembled = PETSC_FALSE;
325         n_loc          = 0;
326         for (j = 0; j < n; j++) {
327           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
328           in_loc[n_loc] = in[j];
329           if (roworiented) {
330             v_loc[n_loc] = v[i * n + j];
331           } else {
332             v_loc[n_loc] = v[j * m + i];
333           }
334           n_loc++;
335         }
336         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
337       }
338     }
339   }
340   PetscFunctionReturn(0);
341 }
342 
343 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
344 {
345   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
346   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
347   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
348   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
349   PetscBool          roworiented = a->roworiented;
350   const PetscScalar *value       = v;
351   MatScalar         *ap, *aa = a->a, *bap;
352 
353   PetscFunctionBegin;
354   if (col < row) {
355     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
356     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
357   }
358   rp    = aj + ai[row];
359   ap    = aa + bs2 * ai[row];
360   rmax  = imax[row];
361   nrow  = ailen[row];
362   value = v;
363   low   = 0;
364   high  = nrow;
365 
366   while (high - low > 7) {
367     t = (low + high) / 2;
368     if (rp[t] > col) high = t;
369     else low = t;
370   }
371   for (i = low; i < high; i++) {
372     if (rp[i] > col) break;
373     if (rp[i] == col) {
374       bap = ap + bs2 * i;
375       if (roworiented) {
376         if (is == ADD_VALUES) {
377           for (ii = 0; ii < bs; ii++) {
378             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
379           }
380         } else {
381           for (ii = 0; ii < bs; ii++) {
382             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
383           }
384         }
385       } else {
386         if (is == ADD_VALUES) {
387           for (ii = 0; ii < bs; ii++) {
388             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
389           }
390         } else {
391           for (ii = 0; ii < bs; ii++) {
392             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
393           }
394         }
395       }
396       goto noinsert2;
397     }
398   }
399   if (nonew == 1) goto noinsert2;
400   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
401   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
402   N = nrow++ - 1;
403   high++;
404   /* shift up all the later entries in this row */
405   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
406   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
407   rp[i] = col;
408   bap   = ap + bs2 * i;
409   if (roworiented) {
410     for (ii = 0; ii < bs; ii++) {
411       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
412     }
413   } else {
414     for (ii = 0; ii < bs; ii++) {
415       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
416     }
417   }
418 noinsert2:;
419   ailen[row] = nrow;
420   PetscFunctionReturn(0);
421 }
422 
423 /*
424    This routine is exactly duplicated in mpibaij.c
425 */
426 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
427 {
428   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
429   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
430   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
431   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
432   PetscBool          roworiented = a->roworiented;
433   const PetscScalar *value       = v;
434   MatScalar         *ap, *aa = a->a, *bap;
435 
436   PetscFunctionBegin;
437   rp    = aj + ai[row];
438   ap    = aa + bs2 * ai[row];
439   rmax  = imax[row];
440   nrow  = ailen[row];
441   low   = 0;
442   high  = nrow;
443   value = v;
444   while (high - low > 7) {
445     t = (low + high) / 2;
446     if (rp[t] > col) high = t;
447     else low = t;
448   }
449   for (i = low; i < high; i++) {
450     if (rp[i] > col) break;
451     if (rp[i] == col) {
452       bap = ap + bs2 * i;
453       if (roworiented) {
454         if (is == ADD_VALUES) {
455           for (ii = 0; ii < bs; ii++) {
456             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
457           }
458         } else {
459           for (ii = 0; ii < bs; ii++) {
460             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
461           }
462         }
463       } else {
464         if (is == ADD_VALUES) {
465           for (ii = 0; ii < bs; ii++, value += bs) {
466             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
467             bap += bs;
468           }
469         } else {
470           for (ii = 0; ii < bs; ii++, value += bs) {
471             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
472             bap += bs;
473           }
474         }
475       }
476       goto noinsert2;
477     }
478   }
479   if (nonew == 1) goto noinsert2;
480   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
481   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
482   N = nrow++ - 1;
483   high++;
484   /* shift up all the later entries in this row */
485   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
486   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
487   rp[i] = col;
488   bap   = ap + bs2 * i;
489   if (roworiented) {
490     for (ii = 0; ii < bs; ii++) {
491       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
492     }
493   } else {
494     for (ii = 0; ii < bs; ii++) {
495       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
496     }
497   }
498 noinsert2:;
499   ailen[row] = nrow;
500   PetscFunctionReturn(0);
501 }
502 
503 /*
504     This routine could be optimized by removing the need for the block copy below and passing stride information
505   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
506 */
507 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
508 {
509   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
510   const MatScalar *value;
511   MatScalar       *barray      = baij->barray;
512   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
513   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
514   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
515   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
516 
517   PetscFunctionBegin;
518   if (!barray) {
519     PetscCall(PetscMalloc1(bs2, &barray));
520     baij->barray = barray;
521   }
522 
523   if (roworiented) {
524     stepval = (n - 1) * bs;
525   } else {
526     stepval = (m - 1) * bs;
527   }
528   for (i = 0; i < m; i++) {
529     if (im[i] < 0) continue;
530     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
531     if (im[i] >= rstart && im[i] < rend) {
532       row = im[i] - rstart;
533       for (j = 0; j < n; j++) {
534         if (im[i] > in[j]) {
535           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
536           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
537         }
538         /* If NumCol = 1 then a copy is not required */
539         if ((roworiented) && (n == 1)) {
540           barray = (MatScalar *)v + i * bs2;
541         } else if ((!roworiented) && (m == 1)) {
542           barray = (MatScalar *)v + j * bs2;
543         } else { /* Here a copy is required */
544           if (roworiented) {
545             value = v + i * (stepval + bs) * bs + j * bs;
546           } else {
547             value = v + j * (stepval + bs) * bs + i * bs;
548           }
549           for (ii = 0; ii < bs; ii++, value += stepval) {
550             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
551           }
552           barray -= bs2;
553         }
554 
555         if (in[j] >= cstart && in[j] < cend) {
556           col = in[j] - cstart;
557           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
558         } else if (in[j] < 0) {
559           continue;
560         } else {
561           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
562           if (mat->was_assembled) {
563             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
564 
565 #if defined(PETSC_USE_DEBUG)
566   #if defined(PETSC_USE_CTABLE)
567             {
568               PetscInt data;
569               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
570               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
571             }
572   #else
573             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
574   #endif
575 #endif
576 #if defined(PETSC_USE_CTABLE)
577             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
578             col = (col - 1) / bs;
579 #else
580             col = (baij->colmap[in[j]] - 1) / bs;
581 #endif
582             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
583               PetscCall(MatDisAssemble_MPISBAIJ(mat));
584               col = in[j];
585             }
586           } else col = in[j];
587           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
588         }
589       }
590     } else {
591       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
592       if (!baij->donotstash) {
593         if (roworiented) {
594           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
595         } else {
596           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
597         }
598       }
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
605 {
606   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
607   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
608   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
609 
610   PetscFunctionBegin;
611   for (i = 0; i < m; i++) {
612     if (idxm[i] < 0) continue; /* negative row */
613     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
614     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
615       row = idxm[i] - bsrstart;
616       for (j = 0; j < n; j++) {
617         if (idxn[j] < 0) continue; /* negative column */
618         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
619         if (idxn[j] >= bscstart && idxn[j] < bscend) {
620           col = idxn[j] - bscstart;
621           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
622         } else {
623           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624 #if defined(PETSC_USE_CTABLE)
625           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
626           data--;
627 #else
628           data = baij->colmap[idxn[j] / bs] - 1;
629 #endif
630           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
631           else {
632             col = data + idxn[j] % bs;
633             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
634           }
635         }
636       }
637     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
638   }
639   PetscFunctionReturn(0);
640 }
641 
642 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
643 {
644   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
645   PetscReal     sum[2], *lnorm2;
646 
647   PetscFunctionBegin;
648   if (baij->size == 1) {
649     PetscCall(MatNorm(baij->A, type, norm));
650   } else {
651     if (type == NORM_FROBENIUS) {
652       PetscCall(PetscMalloc1(2, &lnorm2));
653       PetscCall(MatNorm(baij->A, type, lnorm2));
654       *lnorm2 = (*lnorm2) * (*lnorm2);
655       lnorm2++; /* squar power of norm(A) */
656       PetscCall(MatNorm(baij->B, type, lnorm2));
657       *lnorm2 = (*lnorm2) * (*lnorm2);
658       lnorm2--; /* squar power of norm(B) */
659       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
660       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
661       PetscCall(PetscFree(lnorm2));
662     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
663       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
664       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
665       PetscReal    *rsum, *rsum2, vabs;
666       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
667       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
668       MatScalar    *v;
669 
670       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
671       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
672       /* Amat */
673       v  = amat->a;
674       jj = amat->j;
675       for (brow = 0; brow < mbs; brow++) {
676         grow = bs * (rstart + brow);
677         nz   = amat->i[brow + 1] - amat->i[brow];
678         for (bcol = 0; bcol < nz; bcol++) {
679           gcol = bs * (rstart + *jj);
680           jj++;
681           for (col = 0; col < bs; col++) {
682             for (row = 0; row < bs; row++) {
683               vabs = PetscAbsScalar(*v);
684               v++;
685               rsum[gcol + col] += vabs;
686               /* non-diagonal block */
687               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
688             }
689           }
690         }
691         PetscCall(PetscLogFlops(nz * bs * bs));
692       }
693       /* Bmat */
694       v  = bmat->a;
695       jj = bmat->j;
696       for (brow = 0; brow < mbs; brow++) {
697         grow = bs * (rstart + brow);
698         nz   = bmat->i[brow + 1] - bmat->i[brow];
699         for (bcol = 0; bcol < nz; bcol++) {
700           gcol = bs * garray[*jj];
701           jj++;
702           for (col = 0; col < bs; col++) {
703             for (row = 0; row < bs; row++) {
704               vabs = PetscAbsScalar(*v);
705               v++;
706               rsum[gcol + col] += vabs;
707               rsum[grow + row] += vabs;
708             }
709           }
710         }
711         PetscCall(PetscLogFlops(nz * bs * bs));
712       }
713       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
714       *norm = 0.0;
715       for (col = 0; col < mat->cmap->N; col++) {
716         if (rsum2[col] > *norm) *norm = rsum2[col];
717       }
718       PetscCall(PetscFree2(rsum, rsum2));
719     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
725 {
726   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
727   PetscInt      nstash, reallocs;
728 
729   PetscFunctionBegin;
730   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
731 
732   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
733   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
734   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
735   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
736   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
737   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
738   PetscFunctionReturn(0);
739 }
740 
741 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
742 {
743   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
744   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
745   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
746   PetscInt     *row, *col;
747   PetscBool     other_disassembled;
748   PetscMPIInt   n;
749   PetscBool     r1, r2, r3;
750   MatScalar    *val;
751 
752   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
753   PetscFunctionBegin;
754   if (!baij->donotstash && !mat->nooffprocentries) {
755     while (1) {
756       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
757       if (!flg) break;
758 
759       for (i = 0; i < n;) {
760         /* Now identify the consecutive vals belonging to the same row */
761         for (j = i, rstart = row[j]; j < n; j++) {
762           if (row[j] != rstart) break;
763         }
764         if (j < n) ncols = j - i;
765         else ncols = n - i;
766         /* Now assemble all these values with a single function call */
767         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
768         i = j;
769       }
770     }
771     PetscCall(MatStashScatterEnd_Private(&mat->stash));
772     /* Now process the block-stash. Since the values are stashed column-oriented,
773        set the roworiented flag to column oriented, and after MatSetValues()
774        restore the original flags */
775     r1 = baij->roworiented;
776     r2 = a->roworiented;
777     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
778 
779     baij->roworiented = PETSC_FALSE;
780     a->roworiented    = PETSC_FALSE;
781 
782     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
783     while (1) {
784       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
785       if (!flg) break;
786 
787       for (i = 0; i < n;) {
788         /* Now identify the consecutive vals belonging to the same row */
789         for (j = i, rstart = row[j]; j < n; j++) {
790           if (row[j] != rstart) break;
791         }
792         if (j < n) ncols = j - i;
793         else ncols = n - i;
794         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
795         i = j;
796       }
797     }
798     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
799 
800     baij->roworiented = r1;
801     a->roworiented    = r2;
802 
803     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
804   }
805 
806   PetscCall(MatAssemblyBegin(baij->A, mode));
807   PetscCall(MatAssemblyEnd(baij->A, mode));
808 
809   /* determine if any processor has disassembled, if so we must
810      also disassemble ourselves, in order that we may reassemble. */
811   /*
812      if nonzero structure of submatrix B cannot change then we know that
813      no processor disassembled thus we can skip this stuff
814   */
815   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
816     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
817     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
818   }
819 
820   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
821   PetscCall(MatAssemblyBegin(baij->B, mode));
822   PetscCall(MatAssemblyEnd(baij->B, mode));
823 
824   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
825 
826   baij->rowvalues = NULL;
827 
828   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
829   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
830     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
831     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
837 #include <petscdraw.h>
838 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
839 {
840   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
841   PetscInt          bs   = mat->rmap->bs;
842   PetscMPIInt       rank = baij->rank;
843   PetscBool         iascii, isdraw;
844   PetscViewer       sviewer;
845   PetscViewerFormat format;
846 
847   PetscFunctionBegin;
848   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
850   if (iascii) {
851     PetscCall(PetscViewerGetFormat(viewer, &format));
852     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
853       MatInfo info;
854       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
855       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
856       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
857       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
858                                                    mat->rmap->bs, (double)info.memory));
859       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
860       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
861       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
862       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
863       PetscCall(PetscViewerFlush(viewer));
864       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
865       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
866       PetscCall(VecScatterView(baij->Mvctx, viewer));
867       PetscFunctionReturn(0);
868     } else if (format == PETSC_VIEWER_ASCII_INFO) {
869       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
870       PetscFunctionReturn(0);
871     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
872       PetscFunctionReturn(0);
873     }
874   }
875 
876   if (isdraw) {
877     PetscDraw draw;
878     PetscBool isnull;
879     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
880     PetscCall(PetscDrawIsNull(draw, &isnull));
881     if (isnull) PetscFunctionReturn(0);
882   }
883 
884   {
885     /* assemble the entire matrix onto first processor. */
886     Mat           A;
887     Mat_SeqSBAIJ *Aloc;
888     Mat_SeqBAIJ  *Bloc;
889     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
890     MatScalar    *a;
891     const char   *matname;
892 
893     /* Should this be the same type as mat? */
894     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
895     if (rank == 0) {
896       PetscCall(MatSetSizes(A, M, N, M, N));
897     } else {
898       PetscCall(MatSetSizes(A, 0, 0, M, N));
899     }
900     PetscCall(MatSetType(A, MATMPISBAIJ));
901     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
902     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
903 
904     /* copy over the A part */
905     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
906     ai   = Aloc->i;
907     aj   = Aloc->j;
908     a    = Aloc->a;
909     PetscCall(PetscMalloc1(bs, &rvals));
910 
911     for (i = 0; i < mbs; i++) {
912       rvals[0] = bs * (baij->rstartbs + i);
913       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
914       for (j = ai[i]; j < ai[i + 1]; j++) {
915         col = (baij->cstartbs + aj[j]) * bs;
916         for (k = 0; k < bs; k++) {
917           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
918           col++;
919           a += bs;
920         }
921       }
922     }
923     /* copy over the B part */
924     Bloc = (Mat_SeqBAIJ *)baij->B->data;
925     ai   = Bloc->i;
926     aj   = Bloc->j;
927     a    = Bloc->a;
928     for (i = 0; i < mbs; i++) {
929       rvals[0] = bs * (baij->rstartbs + i);
930       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
931       for (j = ai[i]; j < ai[i + 1]; j++) {
932         col = baij->garray[aj[j]] * bs;
933         for (k = 0; k < bs; k++) {
934           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
935           col++;
936           a += bs;
937         }
938       }
939     }
940     PetscCall(PetscFree(rvals));
941     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
942     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
943     /*
944        Everyone has to call to draw the matrix since the graphics waits are
945        synchronized across all processors that share the PetscDraw object
946     */
947     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
948     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
949     if (rank == 0) {
950       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
951       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
952     }
953     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
954     PetscCall(PetscViewerFlush(viewer));
955     PetscCall(MatDestroy(&A));
956   }
957   PetscFunctionReturn(0);
958 }
959 
960 /* Used for both MPIBAIJ and MPISBAIJ matrices */
961 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
962 
963 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
964 {
965   PetscBool iascii, isdraw, issocket, isbinary;
966 
967   PetscFunctionBegin;
968   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
969   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
970   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
971   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
972   if (iascii || isdraw || issocket) {
973     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
974   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
975   PetscFunctionReturn(0);
976 }
977 
978 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
979 {
980   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
981 
982   PetscFunctionBegin;
983 #if defined(PETSC_USE_LOG)
984   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
985 #endif
986   PetscCall(MatStashDestroy_Private(&mat->stash));
987   PetscCall(MatStashDestroy_Private(&mat->bstash));
988   PetscCall(MatDestroy(&baij->A));
989   PetscCall(MatDestroy(&baij->B));
990 #if defined(PETSC_USE_CTABLE)
991   PetscCall(PetscHMapIDestroy(&baij->colmap));
992 #else
993   PetscCall(PetscFree(baij->colmap));
994 #endif
995   PetscCall(PetscFree(baij->garray));
996   PetscCall(VecDestroy(&baij->lvec));
997   PetscCall(VecScatterDestroy(&baij->Mvctx));
998   PetscCall(VecDestroy(&baij->slvec0));
999   PetscCall(VecDestroy(&baij->slvec0b));
1000   PetscCall(VecDestroy(&baij->slvec1));
1001   PetscCall(VecDestroy(&baij->slvec1a));
1002   PetscCall(VecDestroy(&baij->slvec1b));
1003   PetscCall(VecScatterDestroy(&baij->sMvctx));
1004   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1005   PetscCall(PetscFree(baij->barray));
1006   PetscCall(PetscFree(baij->hd));
1007   PetscCall(VecDestroy(&baij->diag));
1008   PetscCall(VecDestroy(&baij->bb1));
1009   PetscCall(VecDestroy(&baij->xx1));
1010 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1011   PetscCall(PetscFree(baij->setvaluescopy));
1012 #endif
1013   PetscCall(PetscFree(baij->in_loc));
1014   PetscCall(PetscFree(baij->v_loc));
1015   PetscCall(PetscFree(baij->rangebs));
1016   PetscCall(PetscFree(mat->data));
1017 
1018   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
1023 #if defined(PETSC_HAVE_ELEMENTAL)
1024   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
1025 #endif
1026 #if defined(PETSC_HAVE_SCALAPACK)
1027   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
1028 #endif
1029   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
1030   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1035 {
1036   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1037   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1038   PetscScalar       *from;
1039   const PetscScalar *x;
1040 
1041   PetscFunctionBegin;
1042   /* diagonal part */
1043   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1044   PetscCall(VecSet(a->slvec1b, 0.0));
1045 
1046   /* subdiagonal part */
1047   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1048   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1049 
1050   /* copy x into the vec slvec0 */
1051   PetscCall(VecGetArray(a->slvec0, &from));
1052   PetscCall(VecGetArrayRead(xx, &x));
1053 
1054   PetscCall(PetscArraycpy(from, x, bs * mbs));
1055   PetscCall(VecRestoreArray(a->slvec0, &from));
1056   PetscCall(VecRestoreArrayRead(xx, &x));
1057 
1058   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1059   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1060   /* supperdiagonal part */
1061   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1066 {
1067   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1068   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1069   PetscScalar       *from;
1070   const PetscScalar *x;
1071 
1072   PetscFunctionBegin;
1073   /* diagonal part */
1074   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1075   PetscCall(VecSet(a->slvec1b, 0.0));
1076 
1077   /* subdiagonal part */
1078   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1079 
1080   /* copy x into the vec slvec0 */
1081   PetscCall(VecGetArray(a->slvec0, &from));
1082   PetscCall(VecGetArrayRead(xx, &x));
1083 
1084   PetscCall(PetscArraycpy(from, x, bs * mbs));
1085   PetscCall(VecRestoreArray(a->slvec0, &from));
1086   PetscCall(VecRestoreArrayRead(xx, &x));
1087 
1088   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1089   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1090   /* supperdiagonal part */
1091   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1096 {
1097   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1098   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1099   PetscScalar       *from, zero       = 0.0;
1100   const PetscScalar *x;
1101 
1102   PetscFunctionBegin;
1103   /* diagonal part */
1104   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1105   PetscCall(VecSet(a->slvec1b, zero));
1106 
1107   /* subdiagonal part */
1108   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1109   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1110 
1111   /* copy x into the vec slvec0 */
1112   PetscCall(VecGetArray(a->slvec0, &from));
1113   PetscCall(VecGetArrayRead(xx, &x));
1114   PetscCall(PetscArraycpy(from, x, bs * mbs));
1115   PetscCall(VecRestoreArray(a->slvec0, &from));
1116 
1117   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1118   PetscCall(VecRestoreArrayRead(xx, &x));
1119   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1120 
1121   /* supperdiagonal part */
1122   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1127 {
1128   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1129   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1130   PetscScalar       *from, zero       = 0.0;
1131   const PetscScalar *x;
1132 
1133   PetscFunctionBegin;
1134   /* diagonal part */
1135   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1136   PetscCall(VecSet(a->slvec1b, zero));
1137 
1138   /* subdiagonal part */
1139   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1140 
1141   /* copy x into the vec slvec0 */
1142   PetscCall(VecGetArray(a->slvec0, &from));
1143   PetscCall(VecGetArrayRead(xx, &x));
1144   PetscCall(PetscArraycpy(from, x, bs * mbs));
1145   PetscCall(VecRestoreArray(a->slvec0, &from));
1146 
1147   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1148   PetscCall(VecRestoreArrayRead(xx, &x));
1149   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1150 
1151   /* supperdiagonal part */
1152   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 /*
1157   This only works correctly for square matrices where the subblock A->A is the
1158    diagonal block
1159 */
1160 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1161 {
1162   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1163 
1164   PetscFunctionBegin;
1165   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1166   PetscCall(MatGetDiagonal(a->A, v));
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1171 {
1172   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1173 
1174   PetscFunctionBegin;
1175   PetscCall(MatScale(a->A, aa));
1176   PetscCall(MatScale(a->B, aa));
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1181 {
1182   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1183   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1184   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1185   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1186   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1187 
1188   PetscFunctionBegin;
1189   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1190   mat->getrowactive = PETSC_TRUE;
1191 
1192   if (!mat->rowvalues && (idx || v)) {
1193     /*
1194         allocate enough space to hold information from the longest row.
1195     */
1196     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1197     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1198     PetscInt      max = 1, mbs = mat->mbs, tmp;
1199     for (i = 0; i < mbs; i++) {
1200       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1201       if (max < tmp) max = tmp;
1202     }
1203     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1204   }
1205 
1206   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1207   lrow = row - brstart; /* local row index */
1208 
1209   pvA = &vworkA;
1210   pcA = &cworkA;
1211   pvB = &vworkB;
1212   pcB = &cworkB;
1213   if (!v) {
1214     pvA = NULL;
1215     pvB = NULL;
1216   }
1217   if (!idx) {
1218     pcA = NULL;
1219     if (!v) pcB = NULL;
1220   }
1221   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1222   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1223   nztot = nzA + nzB;
1224 
1225   cmap = mat->garray;
1226   if (v || idx) {
1227     if (nztot) {
1228       /* Sort by increasing column numbers, assuming A and B already sorted */
1229       PetscInt imark = -1;
1230       if (v) {
1231         *v = v_p = mat->rowvalues;
1232         for (i = 0; i < nzB; i++) {
1233           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1234           else break;
1235         }
1236         imark = i;
1237         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1238         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1239       }
1240       if (idx) {
1241         *idx = idx_p = mat->rowindices;
1242         if (imark > -1) {
1243           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1244         } else {
1245           for (i = 0; i < nzB; i++) {
1246             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1247             else break;
1248           }
1249           imark = i;
1250         }
1251         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1252         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253       }
1254     } else {
1255       if (idx) *idx = NULL;
1256       if (v) *v = NULL;
1257     }
1258   }
1259   *nz = nztot;
1260   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1261   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1266 {
1267   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1268 
1269   PetscFunctionBegin;
1270   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1271   baij->getrowactive = PETSC_FALSE;
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1276 {
1277   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1278   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1279 
1280   PetscFunctionBegin;
1281   aA->getrow_utriangular = PETSC_TRUE;
1282   PetscFunctionReturn(0);
1283 }
1284 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1285 {
1286   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1287   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1288 
1289   PetscFunctionBegin;
1290   aA->getrow_utriangular = PETSC_FALSE;
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1295 {
1296   PetscFunctionBegin;
1297   if (PetscDefined(USE_COMPLEX)) {
1298     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1299 
1300     PetscCall(MatConjugate(a->A));
1301     PetscCall(MatConjugate(a->B));
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1307 {
1308   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1309 
1310   PetscFunctionBegin;
1311   PetscCall(MatRealPart(a->A));
1312   PetscCall(MatRealPart(a->B));
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1317 {
1318   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1319 
1320   PetscFunctionBegin;
1321   PetscCall(MatImaginaryPart(a->A));
1322   PetscCall(MatImaginaryPart(a->B));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1327    Input: isrow       - distributed(parallel),
1328           iscol_local - locally owned (seq)
1329 */
1330 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1331 {
1332   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1333   const PetscInt *ptr1, *ptr2;
1334 
1335   PetscFunctionBegin;
1336   PetscCall(ISGetLocalSize(isrow, &sz1));
1337   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1338   if (sz1 > sz2) {
1339     *flg = PETSC_FALSE;
1340     PetscFunctionReturn(0);
1341   }
1342 
1343   PetscCall(ISGetIndices(isrow, &ptr1));
1344   PetscCall(ISGetIndices(iscol_local, &ptr2));
1345 
1346   PetscCall(PetscMalloc1(sz1, &a1));
1347   PetscCall(PetscMalloc1(sz2, &a2));
1348   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1349   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1350   PetscCall(PetscSortInt(sz1, a1));
1351   PetscCall(PetscSortInt(sz2, a2));
1352 
1353   nmatch = 0;
1354   k      = 0;
1355   for (i = 0; i < sz1; i++) {
1356     for (j = k; j < sz2; j++) {
1357       if (a1[i] == a2[j]) {
1358         k = j;
1359         nmatch++;
1360         break;
1361       }
1362     }
1363   }
1364   PetscCall(ISRestoreIndices(isrow, &ptr1));
1365   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1366   PetscCall(PetscFree(a1));
1367   PetscCall(PetscFree(a2));
1368   if (nmatch < sz1) {
1369     *flg = PETSC_FALSE;
1370   } else {
1371     *flg = PETSC_TRUE;
1372   }
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1377 {
1378   IS        iscol_local;
1379   PetscInt  csize;
1380   PetscBool isequal;
1381 
1382   PetscFunctionBegin;
1383   PetscCall(ISGetLocalSize(iscol, &csize));
1384   if (call == MAT_REUSE_MATRIX) {
1385     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1386     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1387   } else {
1388     PetscBool issorted;
1389 
1390     PetscCall(ISAllGather(iscol, &iscol_local));
1391     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1392     PetscCall(ISSorted(iscol_local, &issorted));
1393     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1394   }
1395 
1396   /* now call MatCreateSubMatrix_MPIBAIJ() */
1397   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1398   if (call == MAT_INITIAL_MATRIX) {
1399     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1400     PetscCall(ISDestroy(&iscol_local));
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1406 {
1407   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1408 
1409   PetscFunctionBegin;
1410   PetscCall(MatZeroEntries(l->A));
1411   PetscCall(MatZeroEntries(l->B));
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1416 {
1417   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1418   Mat            A = a->A, B = a->B;
1419   PetscLogDouble isend[5], irecv[5];
1420 
1421   PetscFunctionBegin;
1422   info->block_size = (PetscReal)matin->rmap->bs;
1423 
1424   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1425 
1426   isend[0] = info->nz_used;
1427   isend[1] = info->nz_allocated;
1428   isend[2] = info->nz_unneeded;
1429   isend[3] = info->memory;
1430   isend[4] = info->mallocs;
1431 
1432   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1433 
1434   isend[0] += info->nz_used;
1435   isend[1] += info->nz_allocated;
1436   isend[2] += info->nz_unneeded;
1437   isend[3] += info->memory;
1438   isend[4] += info->mallocs;
1439   if (flag == MAT_LOCAL) {
1440     info->nz_used      = isend[0];
1441     info->nz_allocated = isend[1];
1442     info->nz_unneeded  = isend[2];
1443     info->memory       = isend[3];
1444     info->mallocs      = isend[4];
1445   } else if (flag == MAT_GLOBAL_MAX) {
1446     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else if (flag == MAT_GLOBAL_SUM) {
1454     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1455 
1456     info->nz_used      = irecv[0];
1457     info->nz_allocated = irecv[1];
1458     info->nz_unneeded  = irecv[2];
1459     info->memory       = irecv[3];
1460     info->mallocs      = irecv[4];
1461   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1462   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1463   info->fill_ratio_needed = 0;
1464   info->factor_mallocs    = 0;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1469 {
1470   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1471   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1472 
1473   PetscFunctionBegin;
1474   switch (op) {
1475   case MAT_NEW_NONZERO_LOCATIONS:
1476   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1477   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1478   case MAT_KEEP_NONZERO_PATTERN:
1479   case MAT_SUBMAT_SINGLEIS:
1480   case MAT_NEW_NONZERO_LOCATION_ERR:
1481     MatCheckPreallocated(A, 1);
1482     PetscCall(MatSetOption(a->A, op, flg));
1483     PetscCall(MatSetOption(a->B, op, flg));
1484     break;
1485   case MAT_ROW_ORIENTED:
1486     MatCheckPreallocated(A, 1);
1487     a->roworiented = flg;
1488 
1489     PetscCall(MatSetOption(a->A, op, flg));
1490     PetscCall(MatSetOption(a->B, op, flg));
1491     break;
1492   case MAT_FORCE_DIAGONAL_ENTRIES:
1493   case MAT_SORTED_FULL:
1494     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1495     break;
1496   case MAT_IGNORE_OFF_PROC_ENTRIES:
1497     a->donotstash = flg;
1498     break;
1499   case MAT_USE_HASH_TABLE:
1500     a->ht_flag = flg;
1501     break;
1502   case MAT_HERMITIAN:
1503     MatCheckPreallocated(A, 1);
1504     PetscCall(MatSetOption(a->A, op, flg));
1505 #if defined(PETSC_USE_COMPLEX)
1506     if (flg) { /* need different mat-vec ops */
1507       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1508       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1509       A->ops->multtranspose    = NULL;
1510       A->ops->multtransposeadd = NULL;
1511       A->symmetric             = PETSC_BOOL3_FALSE;
1512     }
1513 #endif
1514     break;
1515   case MAT_SPD:
1516   case MAT_SYMMETRIC:
1517     MatCheckPreallocated(A, 1);
1518     PetscCall(MatSetOption(a->A, op, flg));
1519 #if defined(PETSC_USE_COMPLEX)
1520     if (flg) { /* restore to use default mat-vec ops */
1521       A->ops->mult             = MatMult_MPISBAIJ;
1522       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1523       A->ops->multtranspose    = MatMult_MPISBAIJ;
1524       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1525     }
1526 #endif
1527     break;
1528   case MAT_STRUCTURALLY_SYMMETRIC:
1529     MatCheckPreallocated(A, 1);
1530     PetscCall(MatSetOption(a->A, op, flg));
1531     break;
1532   case MAT_SYMMETRY_ETERNAL:
1533   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1534     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1535     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1536     break;
1537   case MAT_SPD_ETERNAL:
1538     break;
1539   case MAT_IGNORE_LOWER_TRIANGULAR:
1540     aA->ignore_ltriangular = flg;
1541     break;
1542   case MAT_ERROR_LOWER_TRIANGULAR:
1543     aA->ignore_ltriangular = flg;
1544     break;
1545   case MAT_GETROW_UPPERTRIANGULAR:
1546     aA->getrow_utriangular = flg;
1547     break;
1548   default:
1549     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1550   }
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1555 {
1556   PetscFunctionBegin;
1557   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1558   if (reuse == MAT_INITIAL_MATRIX) {
1559     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1560   } else if (reuse == MAT_REUSE_MATRIX) {
1561     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1567 {
1568   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1569   Mat           a = baij->A, b = baij->B;
1570   PetscInt      nv, m, n;
1571   PetscBool     flg;
1572 
1573   PetscFunctionBegin;
1574   if (ll != rr) {
1575     PetscCall(VecEqual(ll, rr, &flg));
1576     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1577   }
1578   if (!ll) PetscFunctionReturn(0);
1579 
1580   PetscCall(MatGetLocalSize(mat, &m, &n));
1581   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1582 
1583   PetscCall(VecGetLocalSize(rr, &nv));
1584   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1585 
1586   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1587 
1588   /* left diagonalscale the off-diagonal part */
1589   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1590 
1591   /* scale the diagonal part */
1592   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1593 
1594   /* right diagonalscale the off-diagonal part */
1595   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1596   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1601 {
1602   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1603 
1604   PetscFunctionBegin;
1605   PetscCall(MatSetUnfactored(a->A));
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1610 
1611 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1612 {
1613   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1614   Mat           a, b, c, d;
1615   PetscBool     flg;
1616 
1617   PetscFunctionBegin;
1618   a = matA->A;
1619   b = matA->B;
1620   c = matB->A;
1621   d = matB->B;
1622 
1623   PetscCall(MatEqual(a, c, &flg));
1624   if (flg) PetscCall(MatEqual(b, d, &flg));
1625   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1630 {
1631   PetscBool isbaij;
1632 
1633   PetscFunctionBegin;
1634   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1635   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1636   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1637   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1638     PetscCall(MatGetRowUpperTriangular(A));
1639     PetscCall(MatCopy_Basic(A, B, str));
1640     PetscCall(MatRestoreRowUpperTriangular(A));
1641   } else {
1642     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1643     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1644 
1645     PetscCall(MatCopy(a->A, b->A, str));
1646     PetscCall(MatCopy(a->B, b->B, str));
1647   }
1648   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1653 {
1654   PetscFunctionBegin;
1655   PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1660 {
1661   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1662   PetscBLASInt  bnz, one                          = 1;
1663   Mat_SeqSBAIJ *xa, *ya;
1664   Mat_SeqBAIJ  *xb, *yb;
1665 
1666   PetscFunctionBegin;
1667   if (str == SAME_NONZERO_PATTERN) {
1668     PetscScalar alpha = a;
1669     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1670     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1671     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1672     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1673     xb = (Mat_SeqBAIJ *)xx->B->data;
1674     yb = (Mat_SeqBAIJ *)yy->B->data;
1675     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1676     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1677     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1678   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1679     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1680     PetscCall(MatAXPY_Basic(Y, a, X, str));
1681     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1682   } else {
1683     Mat       B;
1684     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1685     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1686     PetscCall(MatGetRowUpperTriangular(X));
1687     PetscCall(MatGetRowUpperTriangular(Y));
1688     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1689     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1690     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1691     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1692     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1693     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1694     PetscCall(MatSetType(B, MATMPISBAIJ));
1695     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1696     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1697     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1698     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1699     PetscCall(MatHeaderMerge(Y, &B));
1700     PetscCall(PetscFree(nnz_d));
1701     PetscCall(PetscFree(nnz_o));
1702     PetscCall(MatRestoreRowUpperTriangular(X));
1703     PetscCall(MatRestoreRowUpperTriangular(Y));
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1709 {
1710   PetscInt  i;
1711   PetscBool flg;
1712 
1713   PetscFunctionBegin;
1714   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1715   for (i = 0; i < n; i++) {
1716     PetscCall(ISEqual(irow[i], icol[i], &flg));
1717     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1723 {
1724   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1725   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1726 
1727   PetscFunctionBegin;
1728   if (!Y->preallocated) {
1729     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1730   } else if (!aij->nz) {
1731     PetscInt nonew = aij->nonew;
1732     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1733     aij->nonew = nonew;
1734   }
1735   PetscCall(MatShift_Basic(Y, a));
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1740 {
1741   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1742 
1743   PetscFunctionBegin;
1744   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1745   PetscCall(MatMissingDiagonal(a->A, missing, d));
1746   if (d) {
1747     PetscInt rstart;
1748     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1749     *d += rstart / A->rmap->bs;
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1755 {
1756   PetscFunctionBegin;
1757   *a = ((Mat_MPISBAIJ *)A->data)->A;
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 /* -------------------------------------------------------------------*/
1762 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1763                                        MatGetRow_MPISBAIJ,
1764                                        MatRestoreRow_MPISBAIJ,
1765                                        MatMult_MPISBAIJ,
1766                                        /*  4*/ MatMultAdd_MPISBAIJ,
1767                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1768                                        MatMultAdd_MPISBAIJ,
1769                                        NULL,
1770                                        NULL,
1771                                        NULL,
1772                                        /* 10*/ NULL,
1773                                        NULL,
1774                                        NULL,
1775                                        MatSOR_MPISBAIJ,
1776                                        MatTranspose_MPISBAIJ,
1777                                        /* 15*/ MatGetInfo_MPISBAIJ,
1778                                        MatEqual_MPISBAIJ,
1779                                        MatGetDiagonal_MPISBAIJ,
1780                                        MatDiagonalScale_MPISBAIJ,
1781                                        MatNorm_MPISBAIJ,
1782                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1783                                        MatAssemblyEnd_MPISBAIJ,
1784                                        MatSetOption_MPISBAIJ,
1785                                        MatZeroEntries_MPISBAIJ,
1786                                        /* 24*/ NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        NULL,
1791                                        /* 29*/ MatSetUp_MPISBAIJ,
1792                                        NULL,
1793                                        NULL,
1794                                        MatGetDiagonalBlock_MPISBAIJ,
1795                                        NULL,
1796                                        /* 34*/ MatDuplicate_MPISBAIJ,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        NULL,
1801                                        /* 39*/ MatAXPY_MPISBAIJ,
1802                                        MatCreateSubMatrices_MPISBAIJ,
1803                                        MatIncreaseOverlap_MPISBAIJ,
1804                                        MatGetValues_MPISBAIJ,
1805                                        MatCopy_MPISBAIJ,
1806                                        /* 44*/ NULL,
1807                                        MatScale_MPISBAIJ,
1808                                        MatShift_MPISBAIJ,
1809                                        NULL,
1810                                        NULL,
1811                                        /* 49*/ NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        NULL,
1815                                        NULL,
1816                                        /* 54*/ NULL,
1817                                        NULL,
1818                                        MatSetUnfactored_MPISBAIJ,
1819                                        NULL,
1820                                        MatSetValuesBlocked_MPISBAIJ,
1821                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        NULL,
1826                                        /* 64*/ NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        NULL,
1831                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1832                                        NULL,
1833                                        MatConvert_MPISBAIJ_Basic,
1834                                        NULL,
1835                                        NULL,
1836                                        /* 74*/ NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        /* 79*/ NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        NULL,
1845                                        MatLoad_MPISBAIJ,
1846                                        /* 84*/ NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        /* 89*/ NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        /* 94*/ NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        NULL,
1861                                        /* 99*/ NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        MatConjugate_MPISBAIJ,
1865                                        NULL,
1866                                        /*104*/ NULL,
1867                                        MatRealPart_MPISBAIJ,
1868                                        MatImaginaryPart_MPISBAIJ,
1869                                        MatGetRowUpperTriangular_MPISBAIJ,
1870                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1871                                        /*109*/ NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        MatMissingDiagonal_MPISBAIJ,
1876                                        /*114*/ NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        /*119*/ NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        /*124*/ NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        /*129*/ NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        /*134*/ NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        /*139*/ MatSetBlockSizes_Default,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        NULL,
1912                                        /*150*/ NULL,
1913                                        NULL};
1914 
1915 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1916 {
1917   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1918   PetscInt      i, mbs, Mbs;
1919   PetscMPIInt   size;
1920 
1921   PetscFunctionBegin;
1922   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1923   PetscCall(PetscLayoutSetUp(B->rmap));
1924   PetscCall(PetscLayoutSetUp(B->cmap));
1925   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1926   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1927   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1928 
1929   mbs = B->rmap->n / bs;
1930   Mbs = B->rmap->N / bs;
1931   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1932 
1933   B->rmap->bs = bs;
1934   b->bs2      = bs * bs;
1935   b->mbs      = mbs;
1936   b->Mbs      = Mbs;
1937   b->nbs      = B->cmap->n / bs;
1938   b->Nbs      = B->cmap->N / bs;
1939 
1940   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1941   b->rstartbs = B->rmap->rstart / bs;
1942   b->rendbs   = B->rmap->rend / bs;
1943 
1944   b->cstartbs = B->cmap->rstart / bs;
1945   b->cendbs   = B->cmap->rend / bs;
1946 
1947 #if defined(PETSC_USE_CTABLE)
1948   PetscCall(PetscHMapIDestroy(&b->colmap));
1949 #else
1950   PetscCall(PetscFree(b->colmap));
1951 #endif
1952   PetscCall(PetscFree(b->garray));
1953   PetscCall(VecDestroy(&b->lvec));
1954   PetscCall(VecScatterDestroy(&b->Mvctx));
1955   PetscCall(VecDestroy(&b->slvec0));
1956   PetscCall(VecDestroy(&b->slvec0b));
1957   PetscCall(VecDestroy(&b->slvec1));
1958   PetscCall(VecDestroy(&b->slvec1a));
1959   PetscCall(VecDestroy(&b->slvec1b));
1960   PetscCall(VecScatterDestroy(&b->sMvctx));
1961 
1962   /* Because the B will have been resized we simply destroy it and create a new one each time */
1963   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1964   PetscCall(MatDestroy(&b->B));
1965   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1966   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1967   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1968 
1969   if (!B->preallocated) {
1970     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1971     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1972     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1973     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1974   }
1975 
1976   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1977   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1978 
1979   B->preallocated  = PETSC_TRUE;
1980   B->was_assembled = PETSC_FALSE;
1981   B->assembled     = PETSC_FALSE;
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1986 {
1987   PetscInt        m, rstart, cend;
1988   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1989   const PetscInt *JJ          = NULL;
1990   PetscScalar    *values      = NULL;
1991   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1992   PetscBool       nooffprocentries;
1993 
1994   PetscFunctionBegin;
1995   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1996   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1997   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1998   PetscCall(PetscLayoutSetUp(B->rmap));
1999   PetscCall(PetscLayoutSetUp(B->cmap));
2000   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2001   m      = B->rmap->n / bs;
2002   rstart = B->rmap->rstart / bs;
2003   cend   = B->cmap->rend / bs;
2004 
2005   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2006   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2007   for (i = 0; i < m; i++) {
2008     nz = ii[i + 1] - ii[i];
2009     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2010     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2011     JJ = jj + ii[i];
2012     bd = 0;
2013     for (j = 0; j < nz; j++) {
2014       if (*JJ >= i + rstart) break;
2015       JJ++;
2016       bd++;
2017     }
2018     d = 0;
2019     for (; j < nz; j++) {
2020       if (*JJ++ >= cend) break;
2021       d++;
2022     }
2023     d_nnz[i] = d;
2024     o_nnz[i] = nz - d - bd;
2025     nz       = nz - bd;
2026     nz_max   = PetscMax(nz_max, nz);
2027   }
2028   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2029   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2030   PetscCall(PetscFree2(d_nnz, o_nnz));
2031 
2032   values = (PetscScalar *)V;
2033   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2034   for (i = 0; i < m; i++) {
2035     PetscInt        row   = i + rstart;
2036     PetscInt        ncols = ii[i + 1] - ii[i];
2037     const PetscInt *icols = jj + ii[i];
2038     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2039       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2040       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2041     } else { /* block ordering does not match so we can only insert one block at a time. */
2042       PetscInt j;
2043       for (j = 0; j < ncols; j++) {
2044         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2045         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2046       }
2047     }
2048   }
2049 
2050   if (!V) PetscCall(PetscFree(values));
2051   nooffprocentries    = B->nooffprocentries;
2052   B->nooffprocentries = PETSC_TRUE;
2053   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2054   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2055   B->nooffprocentries = nooffprocentries;
2056 
2057   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 /*MC
2062    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2063    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2064    the matrix is stored.
2065 
2066    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2067    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2068 
2069    Options Database Keys:
2070 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2071 
2072    Note:
2073      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2074      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2075 
2076    Level: beginner
2077 
2078 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2079 M*/
2080 
2081 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2082 {
2083   Mat_MPISBAIJ *b;
2084   PetscBool     flg = PETSC_FALSE;
2085 
2086   PetscFunctionBegin;
2087   PetscCall(PetscNew(&b));
2088   B->data = (void *)b;
2089   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2090 
2091   B->ops->destroy = MatDestroy_MPISBAIJ;
2092   B->ops->view    = MatView_MPISBAIJ;
2093   B->assembled    = PETSC_FALSE;
2094   B->insertmode   = NOT_SET_VALUES;
2095 
2096   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2097   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2098 
2099   /* build local table of row and column ownerships */
2100   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2101 
2102   /* build cache for off array entries formed */
2103   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2104 
2105   b->donotstash  = PETSC_FALSE;
2106   b->colmap      = NULL;
2107   b->garray      = NULL;
2108   b->roworiented = PETSC_TRUE;
2109 
2110   /* stuff used in block assembly */
2111   b->barray = NULL;
2112 
2113   /* stuff used for matrix vector multiply */
2114   b->lvec    = NULL;
2115   b->Mvctx   = NULL;
2116   b->slvec0  = NULL;
2117   b->slvec0b = NULL;
2118   b->slvec1  = NULL;
2119   b->slvec1a = NULL;
2120   b->slvec1b = NULL;
2121   b->sMvctx  = NULL;
2122 
2123   /* stuff for MatGetRow() */
2124   b->rowindices   = NULL;
2125   b->rowvalues    = NULL;
2126   b->getrowactive = PETSC_FALSE;
2127 
2128   /* hash table stuff */
2129   b->ht           = NULL;
2130   b->hd           = NULL;
2131   b->ht_size      = 0;
2132   b->ht_flag      = PETSC_FALSE;
2133   b->ht_fact      = 0;
2134   b->ht_total_ct  = 0;
2135   b->ht_insert_ct = 0;
2136 
2137   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2138   b->ijonly = PETSC_FALSE;
2139 
2140   b->in_loc = NULL;
2141   b->v_loc  = NULL;
2142   b->n_loc  = 0;
2143 
2144   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2145   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2147   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2148 #if defined(PETSC_HAVE_ELEMENTAL)
2149   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2150 #endif
2151 #if defined(PETSC_HAVE_SCALAPACK)
2152   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2153 #endif
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2155   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2156 
2157   B->symmetric                   = PETSC_BOOL3_TRUE;
2158   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2159   B->symmetry_eternal            = PETSC_TRUE;
2160   B->structural_symmetry_eternal = PETSC_TRUE;
2161 #if defined(PETSC_USE_COMPLEX)
2162   B->hermitian = PETSC_BOOL3_FALSE;
2163 #else
2164   B->hermitian = PETSC_BOOL3_TRUE;
2165 #endif
2166 
2167   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2168   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2169   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2170   if (flg) {
2171     PetscReal fact = 1.39;
2172     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2173     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2174     if (fact <= 1.0) fact = 1.39;
2175     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2176     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2177   }
2178   PetscOptionsEnd();
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 /*MC
2183    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2184 
2185    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2186    and `MATMPISBAIJ` otherwise.
2187 
2188    Options Database Key:
2189 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2190 
2191   Level: beginner
2192 
2193 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2194 M*/
2195 
2196 /*@C
2197    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2198    the user should preallocate the matrix storage by setting the parameters
2199    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2200    performance can be increased by more than a factor of 50.
2201 
2202    Collective
2203 
2204    Input Parameters:
2205 +  B - the matrix
2206 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2207           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2208 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2209            submatrix  (same for all local rows)
2210 .  d_nnz - array containing the number of block nonzeros in the various block rows
2211            in the upper triangular and diagonal part of the in diagonal portion of the local
2212            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2213            for the diagonal entry and set a value even if it is zero.
2214 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2215            submatrix (same for all local rows).
2216 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2217            off-diagonal portion of the local submatrix that is right of the diagonal
2218            (possibly different for each block row) or NULL.
2219 
2220    Options Database Keys:
2221 +   -mat_no_unroll - uses code that does not unroll the loops in the
2222                      block calculations (much slower)
2223 -   -mat_block_size - size of the blocks to use
2224 
2225    Notes:
2226 
2227    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2228    than it must be used on all processors that share the object for that argument.
2229 
2230    If the *_nnz parameter is given then the *_nz parameter is ignored
2231 
2232    Storage Information:
2233    For a square global matrix we define each processor's diagonal portion
2234    to be its local rows and the corresponding columns (a square submatrix);
2235    each processor's off-diagonal portion encompasses the remainder of the
2236    local matrix (a rectangular submatrix).
2237 
2238    The user can specify preallocated storage for the diagonal part of
2239    the local submatrix with either d_nz or d_nnz (not both).  Set
2240    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2241    memory allocation.  Likewise, specify preallocated storage for the
2242    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2243 
2244    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2245    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2246    You can also run with the option -info and look for messages with the string
2247    malloc in them to see if additional memory allocation was needed.
2248 
2249    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2250    the figure below we depict these three local rows and all columns (0-11).
2251 
2252 .vb
2253            0 1 2 3 4 5 6 7 8 9 10 11
2254           --------------------------
2255    row 3  |. . . d d d o o o o  o  o
2256    row 4  |. . . d d d o o o o  o  o
2257    row 5  |. . . d d d o o o o  o  o
2258           --------------------------
2259 .ve
2260 
2261    Thus, any entries in the d locations are stored in the d (diagonal)
2262    submatrix, and any entries in the o locations are stored in the
2263    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2264    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2265 
2266    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2267    plus the diagonal part of the d matrix,
2268    and o_nz should indicate the number of block nonzeros per row in the o matrix
2269 
2270    In general, for PDE problems in which most nonzeros are near the diagonal,
2271    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2272    or you will get TERRIBLE performance; see the users' manual chapter on
2273    matrices.
2274 
2275    Level: intermediate
2276 
2277 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2278 @*/
2279 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2280 {
2281   PetscFunctionBegin;
2282   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2283   PetscValidType(B, 1);
2284   PetscValidLogicalCollectiveInt(B, bs, 2);
2285   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 /*@C
2290    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2291    (block compressed row).  For good matrix assembly performance
2292    the user should preallocate the matrix storage by setting the parameters
2293    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2294    performance can be increased by more than a factor of 50.
2295 
2296    Collective
2297 
2298    Input Parameters:
2299 +  comm - MPI communicator
2300 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2301           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2302 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2303            This value should be the same as the local size used in creating the
2304            y vector for the matrix-vector product y = Ax.
2305 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2306            This value should be the same as the local size used in creating the
2307            x vector for the matrix-vector product y = Ax.
2308 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2309 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2310 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2311            submatrix  (same for all local rows)
2312 .  d_nnz - array containing the number of block nonzeros in the various block rows
2313            in the upper triangular portion of the in diagonal portion of the local
2314            (possibly different for each block block row) or NULL.
2315            If you plan to factor the matrix you must leave room for the diagonal entry and
2316            set its value even if it is zero.
2317 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2318            submatrix (same for all local rows).
2319 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2320            off-diagonal portion of the local submatrix (possibly different for
2321            each block row) or NULL.
2322 
2323    Output Parameter:
2324 .  A - the matrix
2325 
2326    Options Database Keys:
2327 +   -mat_no_unroll - uses code that does not unroll the loops in the
2328                      block calculations (much slower)
2329 .   -mat_block_size - size of the blocks to use
2330 -   -mat_mpi - use the parallel matrix data structures even on one processor
2331                (defaults to using SeqBAIJ format on one processor)
2332 
2333    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2334    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2335    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2336 
2337    Notes:
2338    The number of rows and columns must be divisible by blocksize.
2339    This matrix type does not support complex Hermitian operation.
2340 
2341    The user MUST specify either the local or global matrix dimensions
2342    (possibly both).
2343 
2344    If` PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2345    than it must be used on all processors that share the object for that argument.
2346 
2347    If the *_nnz parameter is given then the *_nz parameter is ignored
2348 
2349    Storage Information:
2350    For a square global matrix we define each processor's diagonal portion
2351    to be its local rows and the corresponding columns (a square submatrix);
2352    each processor's off-diagonal portion encompasses the remainder of the
2353    local matrix (a rectangular submatrix).
2354 
2355    The user can specify preallocated storage for the diagonal part of
2356    the local submatrix with either d_nz or d_nnz (not both).  Set
2357    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2358    memory allocation.  Likewise, specify preallocated storage for the
2359    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2360 
2361    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2362    the figure below we depict these three local rows and all columns (0-11).
2363 
2364 .vb
2365            0 1 2 3 4 5 6 7 8 9 10 11
2366           --------------------------
2367    row 3  |. . . d d d o o o o  o  o
2368    row 4  |. . . d d d o o o o  o  o
2369    row 5  |. . . d d d o o o o  o  o
2370           --------------------------
2371 .ve
2372 
2373    Thus, any entries in the d locations are stored in the d (diagonal)
2374    submatrix, and any entries in the o locations are stored in the
2375    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2376    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2377 
2378    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2379    plus the diagonal part of the d matrix,
2380    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2381    In general, for PDE problems in which most nonzeros are near the diagonal,
2382    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2383    or you will get TERRIBLE performance; see the users' manual chapter on
2384    matrices.
2385 
2386    Level: intermediate
2387 
2388 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2389 @*/
2390 
2391 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2392 {
2393   PetscMPIInt size;
2394 
2395   PetscFunctionBegin;
2396   PetscCall(MatCreate(comm, A));
2397   PetscCall(MatSetSizes(*A, m, n, M, N));
2398   PetscCallMPI(MPI_Comm_size(comm, &size));
2399   if (size > 1) {
2400     PetscCall(MatSetType(*A, MATMPISBAIJ));
2401     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2402   } else {
2403     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2404     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2405   }
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2410 {
2411   Mat           mat;
2412   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2413   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2414   PetscScalar  *array;
2415 
2416   PetscFunctionBegin;
2417   *newmat = NULL;
2418 
2419   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2420   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2421   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2422   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2423   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2424 
2425   mat->factortype   = matin->factortype;
2426   mat->preallocated = PETSC_TRUE;
2427   mat->assembled    = PETSC_TRUE;
2428   mat->insertmode   = NOT_SET_VALUES;
2429 
2430   a      = (Mat_MPISBAIJ *)mat->data;
2431   a->bs2 = oldmat->bs2;
2432   a->mbs = oldmat->mbs;
2433   a->nbs = oldmat->nbs;
2434   a->Mbs = oldmat->Mbs;
2435   a->Nbs = oldmat->Nbs;
2436 
2437   a->size         = oldmat->size;
2438   a->rank         = oldmat->rank;
2439   a->donotstash   = oldmat->donotstash;
2440   a->roworiented  = oldmat->roworiented;
2441   a->rowindices   = NULL;
2442   a->rowvalues    = NULL;
2443   a->getrowactive = PETSC_FALSE;
2444   a->barray       = NULL;
2445   a->rstartbs     = oldmat->rstartbs;
2446   a->rendbs       = oldmat->rendbs;
2447   a->cstartbs     = oldmat->cstartbs;
2448   a->cendbs       = oldmat->cendbs;
2449 
2450   /* hash table stuff */
2451   a->ht           = NULL;
2452   a->hd           = NULL;
2453   a->ht_size      = 0;
2454   a->ht_flag      = oldmat->ht_flag;
2455   a->ht_fact      = oldmat->ht_fact;
2456   a->ht_total_ct  = 0;
2457   a->ht_insert_ct = 0;
2458 
2459   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2460   if (oldmat->colmap) {
2461 #if defined(PETSC_USE_CTABLE)
2462     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2463 #else
2464     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2465     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2466 #endif
2467   } else a->colmap = NULL;
2468 
2469   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2470     PetscCall(PetscMalloc1(len, &a->garray));
2471     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2472   } else a->garray = NULL;
2473 
2474   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2475   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2476   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2477 
2478   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2479   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2480 
2481   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2482   PetscCall(VecGetArray(a->slvec1, &array));
2483   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2484   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2485   PetscCall(VecRestoreArray(a->slvec1, &array));
2486   PetscCall(VecGetArray(a->slvec0, &array));
2487   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2488   PetscCall(VecRestoreArray(a->slvec0, &array));
2489 
2490   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2491   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2492   a->sMvctx = oldmat->sMvctx;
2493 
2494   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2495   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2496   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2497   *newmat = mat;
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2502 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2503 
2504 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2505 {
2506   PetscBool isbinary;
2507 
2508   PetscFunctionBegin;
2509   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2510   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2511   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2516 {
2517   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2518   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2519   PetscReal     atmp;
2520   PetscReal    *work, *svalues, *rvalues;
2521   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2522   PetscMPIInt   rank, size;
2523   PetscInt     *rowners_bs, dest, count, source;
2524   PetscScalar  *va;
2525   MatScalar    *ba;
2526   MPI_Status    stat;
2527 
2528   PetscFunctionBegin;
2529   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2530   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2531   PetscCall(VecGetArray(v, &va));
2532 
2533   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2534   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2535 
2536   bs  = A->rmap->bs;
2537   mbs = a->mbs;
2538   Mbs = a->Mbs;
2539   ba  = b->a;
2540   bi  = b->i;
2541   bj  = b->j;
2542 
2543   /* find ownerships */
2544   rowners_bs = A->rmap->range;
2545 
2546   /* each proc creates an array to be distributed */
2547   PetscCall(PetscCalloc1(bs * Mbs, &work));
2548 
2549   /* row_max for B */
2550   if (rank != size - 1) {
2551     for (i = 0; i < mbs; i++) {
2552       ncols = bi[1] - bi[0];
2553       bi++;
2554       brow = bs * i;
2555       for (j = 0; j < ncols; j++) {
2556         bcol = bs * (*bj);
2557         for (kcol = 0; kcol < bs; kcol++) {
2558           col = bcol + kcol;           /* local col index */
2559           col += rowners_bs[rank + 1]; /* global col index */
2560           for (krow = 0; krow < bs; krow++) {
2561             atmp = PetscAbsScalar(*ba);
2562             ba++;
2563             row = brow + krow; /* local row index */
2564             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2565             if (work[col] < atmp) work[col] = atmp;
2566           }
2567         }
2568         bj++;
2569       }
2570     }
2571 
2572     /* send values to its owners */
2573     for (dest = rank + 1; dest < size; dest++) {
2574       svalues = work + rowners_bs[dest];
2575       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2576       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2577     }
2578   }
2579 
2580   /* receive values */
2581   if (rank) {
2582     rvalues = work;
2583     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2584     for (source = 0; source < rank; source++) {
2585       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2586       /* process values */
2587       for (i = 0; i < count; i++) {
2588         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2589       }
2590     }
2591   }
2592 
2593   PetscCall(VecRestoreArray(v, &va));
2594   PetscCall(PetscFree(work));
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2599 {
2600   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2601   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2602   PetscScalar       *x, *ptr, *from;
2603   Vec                bb1;
2604   const PetscScalar *b;
2605 
2606   PetscFunctionBegin;
2607   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2608   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2609 
2610   if (flag == SOR_APPLY_UPPER) {
2611     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2612     PetscFunctionReturn(0);
2613   }
2614 
2615   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2616     if (flag & SOR_ZERO_INITIAL_GUESS) {
2617       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2618       its--;
2619     }
2620 
2621     PetscCall(VecDuplicate(bb, &bb1));
2622     while (its--) {
2623       /* lower triangular part: slvec0b = - B^T*xx */
2624       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2625 
2626       /* copy xx into slvec0a */
2627       PetscCall(VecGetArray(mat->slvec0, &ptr));
2628       PetscCall(VecGetArray(xx, &x));
2629       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2630       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2631 
2632       PetscCall(VecScale(mat->slvec0, -1.0));
2633 
2634       /* copy bb into slvec1a */
2635       PetscCall(VecGetArray(mat->slvec1, &ptr));
2636       PetscCall(VecGetArrayRead(bb, &b));
2637       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2638       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2639 
2640       /* set slvec1b = 0 */
2641       PetscCall(VecSet(mat->slvec1b, 0.0));
2642 
2643       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2644       PetscCall(VecRestoreArray(xx, &x));
2645       PetscCall(VecRestoreArrayRead(bb, &b));
2646       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2647 
2648       /* upper triangular part: bb1 = bb1 - B*x */
2649       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2650 
2651       /* local diagonal sweep */
2652       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2653     }
2654     PetscCall(VecDestroy(&bb1));
2655   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2656     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2657   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2658     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2659   } else if (flag & SOR_EISENSTAT) {
2660     Vec                xx1;
2661     PetscBool          hasop;
2662     const PetscScalar *diag;
2663     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2664     PetscInt           i, n;
2665 
2666     if (!mat->xx1) {
2667       PetscCall(VecDuplicate(bb, &mat->xx1));
2668       PetscCall(VecDuplicate(bb, &mat->bb1));
2669     }
2670     xx1 = mat->xx1;
2671     bb1 = mat->bb1;
2672 
2673     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2674 
2675     if (!mat->diag) {
2676       /* this is wrong for same matrix with new nonzero values */
2677       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2678       PetscCall(MatGetDiagonal(matin, mat->diag));
2679     }
2680     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2681 
2682     if (hasop) {
2683       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2684       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2685     } else {
2686       /*
2687           These two lines are replaced by code that may be a bit faster for a good compiler
2688       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2689       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2690       */
2691       PetscCall(VecGetArray(mat->slvec1a, &sl));
2692       PetscCall(VecGetArrayRead(mat->diag, &diag));
2693       PetscCall(VecGetArrayRead(bb, &b));
2694       PetscCall(VecGetArray(xx, &x));
2695       PetscCall(VecGetLocalSize(xx, &n));
2696       if (omega == 1.0) {
2697         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2698         PetscCall(PetscLogFlops(2.0 * n));
2699       } else {
2700         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2701         PetscCall(PetscLogFlops(3.0 * n));
2702       }
2703       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2704       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2705       PetscCall(VecRestoreArrayRead(bb, &b));
2706       PetscCall(VecRestoreArray(xx, &x));
2707     }
2708 
2709     /* multiply off-diagonal portion of matrix */
2710     PetscCall(VecSet(mat->slvec1b, 0.0));
2711     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2712     PetscCall(VecGetArray(mat->slvec0, &from));
2713     PetscCall(VecGetArray(xx, &x));
2714     PetscCall(PetscArraycpy(from, x, bs * mbs));
2715     PetscCall(VecRestoreArray(mat->slvec0, &from));
2716     PetscCall(VecRestoreArray(xx, &x));
2717     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2718     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2719     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2720 
2721     /* local sweep */
2722     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2723     PetscCall(VecAXPY(xx, 1.0, xx1));
2724   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 /*@
2729      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2730          CSR format the local rows.
2731 
2732    Collective
2733 
2734    Input Parameters:
2735 +  comm - MPI communicator
2736 .  bs - the block size, only a block size of 1 is supported
2737 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2738 .  n - This value should be the same as the local size used in creating the
2739        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2740        calculated if N is given) For square matrices n is almost always m.
2741 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2742 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2743 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2744 .   j - column indices
2745 -   a - matrix values
2746 
2747    Output Parameter:
2748 .   mat - the matrix
2749 
2750    Level: intermediate
2751 
2752    Notes:
2753        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2754      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2755      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2756 
2757        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2758 
2759 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2760           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2761 @*/
2762 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2763 {
2764   PetscFunctionBegin;
2765   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2766   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2767   PetscCall(MatCreate(comm, mat));
2768   PetscCall(MatSetSizes(*mat, m, n, M, N));
2769   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2770   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 /*@C
2775    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2776 
2777    Collective
2778 
2779    Input Parameters:
2780 +  B - the matrix
2781 .  bs - the block size
2782 .  i - the indices into j for the start of each local row (starts with zero)
2783 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2784 -  v - optional values in the matrix
2785 
2786    Level: advanced
2787 
2788    Notes:
2789    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2790    and usually the numerical values as well
2791 
2792    Any entries below the diagonal are ignored
2793 
2794 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2795 @*/
2796 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2797 {
2798   PetscFunctionBegin;
2799   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2804 {
2805   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2806   PetscInt    *indx;
2807   PetscScalar *values;
2808 
2809   PetscFunctionBegin;
2810   PetscCall(MatGetSize(inmat, &m, &N));
2811   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2812     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2813     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2814     PetscInt     *bindx, rmax = a->rmax, j;
2815     PetscMPIInt   rank, size;
2816 
2817     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2818     mbs = m / bs;
2819     Nbs = N / cbs;
2820     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2821     nbs = n / cbs;
2822 
2823     PetscCall(PetscMalloc1(rmax, &bindx));
2824     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2825 
2826     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2827     PetscCallMPI(MPI_Comm_rank(comm, &size));
2828     if (rank == size - 1) {
2829       /* Check sum(nbs) = Nbs */
2830       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2831     }
2832 
2833     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2834     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2835     for (i = 0; i < mbs; i++) {
2836       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2837       nnz = nnz / bs;
2838       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2839       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2840       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2841     }
2842     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2843     PetscCall(PetscFree(bindx));
2844 
2845     PetscCall(MatCreate(comm, outmat));
2846     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2847     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2848     PetscCall(MatSetType(*outmat, MATSBAIJ));
2849     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2850     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2851     MatPreallocateEnd(dnz, onz);
2852   }
2853 
2854   /* numeric phase */
2855   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2856   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2857 
2858   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2859   for (i = 0; i < m; i++) {
2860     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2861     Ii = i + rstart;
2862     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2863     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2864   }
2865   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2866   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2867   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2868   PetscFunctionReturn(0);
2869 }
2870