xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) {
16   Mat       preallocator;
17   PetscInt  r, rstart, rend;
18   PetscInt  bs, i, m, n, M, N;
19   PetscBool cong = PETSC_TRUE;
20 
21   PetscFunctionBegin;
22   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
23   PetscValidLogicalCollectiveInt(B, nm, 2);
24   for (i = 0; i < nm; i++) {
25     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
26     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
27     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
28   }
29   PetscValidLogicalCollectiveBool(B, fill, 5);
30   PetscCall(MatGetBlockSize(B, &bs));
31   PetscCall(MatGetSize(B, &M, &N));
32   PetscCall(MatGetLocalSize(B, &m, &n));
33   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
34   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
35   PetscCall(MatSetBlockSize(preallocator, bs));
36   PetscCall(MatSetSizes(preallocator, m, n, M, N));
37   PetscCall(MatSetUp(preallocator));
38   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
39   for (r = rstart; r < rend; ++r) {
40     PetscInt           ncols;
41     const PetscInt    *row;
42     const PetscScalar *vals;
43 
44     for (i = 0; i < nm; i++) {
45       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
46       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
47       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
48       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
49     }
50   }
51   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
52   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
53   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
54   PetscCall(MatDestroy(&preallocator));
55   PetscFunctionReturn(0);
56 }
57 
58 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
59   Mat      B;
60   PetscInt r;
61 
62   PetscFunctionBegin;
63   if (reuse != MAT_REUSE_MATRIX) {
64     PetscBool symm = PETSC_TRUE, isdense;
65     PetscInt  bs;
66 
67     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
68     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
69     PetscCall(MatSetType(B, newtype));
70     PetscCall(MatGetBlockSize(A, &bs));
71     PetscCall(MatSetBlockSize(B, bs));
72     PetscCall(PetscLayoutSetUp(B->rmap));
73     PetscCall(PetscLayoutSetUp(B->cmap));
74     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
75     if (!isdense) {
76       PetscCall(MatGetRowUpperTriangular(A));
77       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
78       PetscCall(MatRestoreRowUpperTriangular(A));
79     } else {
80       PetscCall(MatSetUp(B));
81     }
82   } else {
83     B = *newmat;
84     PetscCall(MatZeroEntries(B));
85   }
86 
87   PetscCall(MatGetRowUpperTriangular(A));
88   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
89     PetscInt           ncols;
90     const PetscInt    *row;
91     const PetscScalar *vals;
92 
93     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
94     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
95 #if defined(PETSC_USE_COMPLEX)
96     if (A->hermitian == PETSC_BOOL3_TRUE) {
97       PetscInt i;
98       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
99     } else {
100       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
101     }
102 #else
103     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
104 #endif
105     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
106   }
107   PetscCall(MatRestoreRowUpperTriangular(A));
108   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
109   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
110 
111   if (reuse == MAT_INPLACE_MATRIX) {
112     PetscCall(MatHeaderReplace(A, &B));
113   } else {
114     *newmat = B;
115   }
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) {
120   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
121 
122   PetscFunctionBegin;
123   PetscCall(MatStoreValues(aij->A));
124   PetscCall(MatStoreValues(aij->B));
125   PetscFunctionReturn(0);
126 }
127 
128 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) {
129   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(0);
135 }
136 
137 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
138   { \
139     brow = row / bs; \
140     rp   = aj + ai[brow]; \
141     ap   = aa + bs2 * ai[brow]; \
142     rmax = aimax[brow]; \
143     nrow = ailen[brow]; \
144     bcol = col / bs; \
145     ridx = row % bs; \
146     cidx = col % bs; \
147     low  = 0; \
148     high = nrow; \
149     while (high - low > 3) { \
150       t = (low + high) / 2; \
151       if (rp[t] > bcol) high = t; \
152       else low = t; \
153     } \
154     for (_i = low; _i < high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap + bs2 * _i + bs * cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value; \
159         else *bap = value; \
160         goto a_noinsert; \
161       } \
162     } \
163     if (a->nonew == 1) goto a_noinsert; \
164     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
166     N = nrow++ - 1; \
167     /* shift up all the later entries in this row */ \
168     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
169     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
170     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
171     rp[_i]                          = bcol; \
172     ap[bs2 * _i + bs * cidx + ridx] = value; \
173     A->nonzerostate++; \
174   a_noinsert:; \
175     ailen[brow] = nrow; \
176   }
177 
178 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
179   { \
180     brow = row / bs; \
181     rp   = bj + bi[brow]; \
182     ap   = ba + bs2 * bi[brow]; \
183     rmax = bimax[brow]; \
184     nrow = bilen[brow]; \
185     bcol = col / bs; \
186     ridx = row % bs; \
187     cidx = col % bs; \
188     low  = 0; \
189     high = nrow; \
190     while (high - low > 3) { \
191       t = (low + high) / 2; \
192       if (rp[t] > bcol) high = t; \
193       else low = t; \
194     } \
195     for (_i = low; _i < high; _i++) { \
196       if (rp[_i] > bcol) break; \
197       if (rp[_i] == bcol) { \
198         bap = ap + bs2 * _i + bs * cidx + ridx; \
199         if (addv == ADD_VALUES) *bap += value; \
200         else *bap = value; \
201         goto b_noinsert; \
202       } \
203     } \
204     if (b->nonew == 1) goto b_noinsert; \
205     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
206     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
207     N = nrow++ - 1; \
208     /* shift up all the later entries in this row */ \
209     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
210     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
211     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
212     rp[_i]                          = bcol; \
213     ap[bs2 * _i + bs * cidx + ridx] = value; \
214     B->nonzerostate++; \
215   b_noinsert:; \
216     bilen[brow] = nrow; \
217   }
218 
219 /* Only add/insert a(i,j) with i<=j (blocks).
220    Any a(i,j) with i>j input by user is ingored or generates an error
221 */
222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
223   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
224   MatScalar     value;
225   PetscBool     roworiented = baij->roworiented;
226   PetscInt      i, j, row, col;
227   PetscInt      rstart_orig = mat->rmap->rstart;
228   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
229   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
230 
231   /* Some Variables required in the macro */
232   Mat           A     = baij->A;
233   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
234   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
235   MatScalar    *aa = a->a;
236 
237   Mat          B     = baij->B;
238   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
239   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
240   MatScalar   *ba = b->a;
241 
242   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
243   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
244   MatScalar *ap, *bap;
245 
246   /* for stash */
247   PetscInt   n_loc, *in_loc = NULL;
248   MatScalar *v_loc = NULL;
249 
250   PetscFunctionBegin;
251   if (!baij->donotstash) {
252     if (n > baij->n_loc) {
253       PetscCall(PetscFree(baij->in_loc));
254       PetscCall(PetscFree(baij->v_loc));
255       PetscCall(PetscMalloc1(n, &baij->in_loc));
256       PetscCall(PetscMalloc1(n, &baij->v_loc));
257 
258       baij->n_loc = n;
259     }
260     in_loc = baij->in_loc;
261     v_loc  = baij->v_loc;
262   }
263 
264   for (i = 0; i < m; i++) {
265     if (im[i] < 0) continue;
266     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
267     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
268       row = im[i] - rstart_orig;                     /* local row index */
269       for (j = 0; j < n; j++) {
270         if (im[i] / bs > in[j] / bs) {
271           if (a->ignore_ltriangular) {
272             continue; /* ignore lower triangular blocks */
273           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
274         }
275         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
276           col  = in[j] - cstart_orig;                    /* local col index */
277           brow = row / bs;
278           bcol = col / bs;
279           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
280           if (roworiented) value = v[i * n + j];
281           else value = v[i + j * m];
282           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
283           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
284         } else if (in[j] < 0) {
285           continue;
286         } else {
287           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
288           /* off-diag entry (B) */
289           if (mat->was_assembled) {
290             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
291 #if defined(PETSC_USE_CTABLE)
292             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
293             col = col - 1;
294 #else
295             col = baij->colmap[in[j] / bs] - 1;
296 #endif
297             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
298               PetscCall(MatDisAssemble_MPISBAIJ(mat));
299               col   = in[j];
300               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
301               B     = baij->B;
302               b     = (Mat_SeqBAIJ *)(B)->data;
303               bimax = b->imax;
304               bi    = b->i;
305               bilen = b->ilen;
306               bj    = b->j;
307               ba    = b->a;
308             } else col += in[j] % bs;
309           } else col = in[j];
310           if (roworiented) value = v[i * n + j];
311           else value = v[i + j * m];
312           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
313           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
314         }
315       }
316     } else { /* off processor entry */
317       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
318       if (!baij->donotstash) {
319         mat->assembled = PETSC_FALSE;
320         n_loc          = 0;
321         for (j = 0; j < n; j++) {
322           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
323           in_loc[n_loc] = in[j];
324           if (roworiented) {
325             v_loc[n_loc] = v[i * n + j];
326           } else {
327             v_loc[n_loc] = v[j * m + i];
328           }
329           n_loc++;
330         }
331         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
332       }
333     }
334   }
335   PetscFunctionReturn(0);
336 }
337 
338 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) {
339   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
340   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
341   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
342   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
343   PetscBool          roworiented = a->roworiented;
344   const PetscScalar *value       = v;
345   MatScalar         *ap, *aa = a->a, *bap;
346 
347   PetscFunctionBegin;
348   if (col < row) {
349     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
350     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
351   }
352   rp    = aj + ai[row];
353   ap    = aa + bs2 * ai[row];
354   rmax  = imax[row];
355   nrow  = ailen[row];
356   value = v;
357   low   = 0;
358   high  = nrow;
359 
360   while (high - low > 7) {
361     t = (low + high) / 2;
362     if (rp[t] > col) high = t;
363     else low = t;
364   }
365   for (i = low; i < high; i++) {
366     if (rp[i] > col) break;
367     if (rp[i] == col) {
368       bap = ap + bs2 * i;
369       if (roworiented) {
370         if (is == ADD_VALUES) {
371           for (ii = 0; ii < bs; ii++) {
372             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
373           }
374         } else {
375           for (ii = 0; ii < bs; ii++) {
376             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
377           }
378         }
379       } else {
380         if (is == ADD_VALUES) {
381           for (ii = 0; ii < bs; ii++) {
382             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
383           }
384         } else {
385           for (ii = 0; ii < bs; ii++) {
386             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
387           }
388         }
389       }
390       goto noinsert2;
391     }
392   }
393   if (nonew == 1) goto noinsert2;
394   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
395   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
396   N = nrow++ - 1;
397   high++;
398   /* shift up all the later entries in this row */
399   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
400   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
401   rp[i] = col;
402   bap   = ap + bs2 * i;
403   if (roworiented) {
404     for (ii = 0; ii < bs; ii++) {
405       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
406     }
407   } else {
408     for (ii = 0; ii < bs; ii++) {
409       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
410     }
411   }
412 noinsert2:;
413   ailen[row] = nrow;
414   PetscFunctionReturn(0);
415 }
416 
417 /*
418    This routine is exactly duplicated in mpibaij.c
419 */
420 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) {
421   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
422   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
423   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
424   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
425   PetscBool          roworiented = a->roworiented;
426   const PetscScalar *value       = v;
427   MatScalar         *ap, *aa = a->a, *bap;
428 
429   PetscFunctionBegin;
430   rp    = aj + ai[row];
431   ap    = aa + bs2 * ai[row];
432   rmax  = imax[row];
433   nrow  = ailen[row];
434   low   = 0;
435   high  = nrow;
436   value = v;
437   while (high - low > 7) {
438     t = (low + high) / 2;
439     if (rp[t] > col) high = t;
440     else low = t;
441   }
442   for (i = low; i < high; i++) {
443     if (rp[i] > col) break;
444     if (rp[i] == col) {
445       bap = ap + bs2 * i;
446       if (roworiented) {
447         if (is == ADD_VALUES) {
448           for (ii = 0; ii < bs; ii++) {
449             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
450           }
451         } else {
452           for (ii = 0; ii < bs; ii++) {
453             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
454           }
455         }
456       } else {
457         if (is == ADD_VALUES) {
458           for (ii = 0; ii < bs; ii++, value += bs) {
459             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
460             bap += bs;
461           }
462         } else {
463           for (ii = 0; ii < bs; ii++, value += bs) {
464             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
465             bap += bs;
466           }
467         }
468       }
469       goto noinsert2;
470     }
471   }
472   if (nonew == 1) goto noinsert2;
473   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
474   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
475   N = nrow++ - 1;
476   high++;
477   /* shift up all the later entries in this row */
478   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
479   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
480   rp[i] = col;
481   bap   = ap + bs2 * i;
482   if (roworiented) {
483     for (ii = 0; ii < bs; ii++) {
484       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
485     }
486   } else {
487     for (ii = 0; ii < bs; ii++) {
488       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
489     }
490   }
491 noinsert2:;
492   ailen[row] = nrow;
493   PetscFunctionReturn(0);
494 }
495 
496 /*
497     This routine could be optimized by removing the need for the block copy below and passing stride information
498   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
499 */
500 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) {
501   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
502   const MatScalar *value;
503   MatScalar       *barray      = baij->barray;
504   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
505   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
506   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
507   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
508 
509   PetscFunctionBegin;
510   if (!barray) {
511     PetscCall(PetscMalloc1(bs2, &barray));
512     baij->barray = barray;
513   }
514 
515   if (roworiented) {
516     stepval = (n - 1) * bs;
517   } else {
518     stepval = (m - 1) * bs;
519   }
520   for (i = 0; i < m; i++) {
521     if (im[i] < 0) continue;
522     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
523     if (im[i] >= rstart && im[i] < rend) {
524       row = im[i] - rstart;
525       for (j = 0; j < n; j++) {
526         if (im[i] > in[j]) {
527           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
528           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
529         }
530         /* If NumCol = 1 then a copy is not required */
531         if ((roworiented) && (n == 1)) {
532           barray = (MatScalar *)v + i * bs2;
533         } else if ((!roworiented) && (m == 1)) {
534           barray = (MatScalar *)v + j * bs2;
535         } else { /* Here a copy is required */
536           if (roworiented) {
537             value = v + i * (stepval + bs) * bs + j * bs;
538           } else {
539             value = v + j * (stepval + bs) * bs + i * bs;
540           }
541           for (ii = 0; ii < bs; ii++, value += stepval) {
542             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
543           }
544           barray -= bs2;
545         }
546 
547         if (in[j] >= cstart && in[j] < cend) {
548           col = in[j] - cstart;
549           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
550         } else if (in[j] < 0) {
551           continue;
552         } else {
553           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
554           if (mat->was_assembled) {
555             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
556 
557 #if defined(PETSC_USE_DEBUG)
558 #if defined(PETSC_USE_CTABLE)
559             {
560               PetscInt data;
561               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
562               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
563             }
564 #else
565             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
566 #endif
567 #endif
568 #if defined(PETSC_USE_CTABLE)
569             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
570             col = (col - 1) / bs;
571 #else
572             col = (baij->colmap[in[j]] - 1) / bs;
573 #endif
574             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
575               PetscCall(MatDisAssemble_MPISBAIJ(mat));
576               col = in[j];
577             }
578           } else col = in[j];
579           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
580         }
581       }
582     } else {
583       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
584       if (!baij->donotstash) {
585         if (roworiented) {
586           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
587         } else {
588           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
589         }
590       }
591     }
592   }
593   PetscFunctionReturn(0);
594 }
595 
596 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
597   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
598   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
599   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
600 
601   PetscFunctionBegin;
602   for (i = 0; i < m; i++) {
603     if (idxm[i] < 0) continue; /* negative row */
604     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
605     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
606       row = idxm[i] - bsrstart;
607       for (j = 0; j < n; j++) {
608         if (idxn[j] < 0) continue; /* negative column */
609         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
610         if (idxn[j] >= bscstart && idxn[j] < bscend) {
611           col = idxn[j] - bscstart;
612           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
613         } else {
614           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
615 #if defined(PETSC_USE_CTABLE)
616           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
617           data--;
618 #else
619           data = baij->colmap[idxn[j] / bs] - 1;
620 #endif
621           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
622           else {
623             col = data + idxn[j] % bs;
624             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
625           }
626         }
627       }
628     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) {
634   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
635   PetscReal     sum[2], *lnorm2;
636 
637   PetscFunctionBegin;
638   if (baij->size == 1) {
639     PetscCall(MatNorm(baij->A, type, norm));
640   } else {
641     if (type == NORM_FROBENIUS) {
642       PetscCall(PetscMalloc1(2, &lnorm2));
643       PetscCall(MatNorm(baij->A, type, lnorm2));
644       *lnorm2 = (*lnorm2) * (*lnorm2);
645       lnorm2++; /* squar power of norm(A) */
646       PetscCall(MatNorm(baij->B, type, lnorm2));
647       *lnorm2 = (*lnorm2) * (*lnorm2);
648       lnorm2--; /* squar power of norm(B) */
649       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
650       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
651       PetscCall(PetscFree(lnorm2));
652     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
653       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
654       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
655       PetscReal    *rsum, *rsum2, vabs;
656       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
657       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
658       MatScalar    *v;
659 
660       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
661       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
662       /* Amat */
663       v  = amat->a;
664       jj = amat->j;
665       for (brow = 0; brow < mbs; brow++) {
666         grow = bs * (rstart + brow);
667         nz   = amat->i[brow + 1] - amat->i[brow];
668         for (bcol = 0; bcol < nz; bcol++) {
669           gcol = bs * (rstart + *jj);
670           jj++;
671           for (col = 0; col < bs; col++) {
672             for (row = 0; row < bs; row++) {
673               vabs = PetscAbsScalar(*v);
674               v++;
675               rsum[gcol + col] += vabs;
676               /* non-diagonal block */
677               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
678             }
679           }
680         }
681         PetscCall(PetscLogFlops(nz * bs * bs));
682       }
683       /* Bmat */
684       v  = bmat->a;
685       jj = bmat->j;
686       for (brow = 0; brow < mbs; brow++) {
687         grow = bs * (rstart + brow);
688         nz   = bmat->i[brow + 1] - bmat->i[brow];
689         for (bcol = 0; bcol < nz; bcol++) {
690           gcol = bs * garray[*jj];
691           jj++;
692           for (col = 0; col < bs; col++) {
693             for (row = 0; row < bs; row++) {
694               vabs = PetscAbsScalar(*v);
695               v++;
696               rsum[gcol + col] += vabs;
697               rsum[grow + row] += vabs;
698             }
699           }
700         }
701         PetscCall(PetscLogFlops(nz * bs * bs));
702       }
703       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
704       *norm = 0.0;
705       for (col = 0; col < mat->cmap->N; col++) {
706         if (rsum2[col] > *norm) *norm = rsum2[col];
707       }
708       PetscCall(PetscFree2(rsum, rsum2));
709     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
710   }
711   PetscFunctionReturn(0);
712 }
713 
714 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) {
715   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
716   PetscInt      nstash, reallocs;
717 
718   PetscFunctionBegin;
719   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
720 
721   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
722   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
723   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
724   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
725   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
726   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
727   PetscFunctionReturn(0);
728 }
729 
730 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) {
731   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
732   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
733   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
734   PetscInt     *row, *col;
735   PetscBool     other_disassembled;
736   PetscMPIInt   n;
737   PetscBool     r1, r2, r3;
738   MatScalar    *val;
739 
740   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
741   PetscFunctionBegin;
742   if (!baij->donotstash && !mat->nooffprocentries) {
743     while (1) {
744       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
745       if (!flg) break;
746 
747       for (i = 0; i < n;) {
748         /* Now identify the consecutive vals belonging to the same row */
749         for (j = i, rstart = row[j]; j < n; j++) {
750           if (row[j] != rstart) break;
751         }
752         if (j < n) ncols = j - i;
753         else ncols = n - i;
754         /* Now assemble all these values with a single function call */
755         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
756         i = j;
757       }
758     }
759     PetscCall(MatStashScatterEnd_Private(&mat->stash));
760     /* Now process the block-stash. Since the values are stashed column-oriented,
761        set the roworiented flag to column oriented, and after MatSetValues()
762        restore the original flags */
763     r1 = baij->roworiented;
764     r2 = a->roworiented;
765     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
766 
767     baij->roworiented = PETSC_FALSE;
768     a->roworiented    = PETSC_FALSE;
769 
770     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
771     while (1) {
772       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
773       if (!flg) break;
774 
775       for (i = 0; i < n;) {
776         /* Now identify the consecutive vals belonging to the same row */
777         for (j = i, rstart = row[j]; j < n; j++) {
778           if (row[j] != rstart) break;
779         }
780         if (j < n) ncols = j - i;
781         else ncols = n - i;
782         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
783         i = j;
784       }
785     }
786     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
787 
788     baij->roworiented = r1;
789     a->roworiented    = r2;
790 
791     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
792   }
793 
794   PetscCall(MatAssemblyBegin(baij->A, mode));
795   PetscCall(MatAssemblyEnd(baij->A, mode));
796 
797   /* determine if any processor has disassembled, if so we must
798      also disassemble ourselves, in order that we may reassemble. */
799   /*
800      if nonzero structure of submatrix B cannot change then we know that
801      no processor disassembled thus we can skip this stuff
802   */
803   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
804     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
805     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
806   }
807 
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
809   PetscCall(MatAssemblyBegin(baij->B, mode));
810   PetscCall(MatAssemblyEnd(baij->B, mode));
811 
812   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
813 
814   baij->rowvalues = NULL;
815 
816   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
817   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
818     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
819     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
825 #include <petscdraw.h>
826 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
827   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
828   PetscInt          bs   = mat->rmap->bs;
829   PetscMPIInt       rank = baij->rank;
830   PetscBool         iascii, isdraw;
831   PetscViewer       sviewer;
832   PetscViewerFormat format;
833 
834   PetscFunctionBegin;
835   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
836   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
837   if (iascii) {
838     PetscCall(PetscViewerGetFormat(viewer, &format));
839     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
840       MatInfo info;
841       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
842       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
843       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
844       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
845                                                    mat->rmap->bs, (double)info.memory));
846       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
847       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
848       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
849       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
850       PetscCall(PetscViewerFlush(viewer));
851       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
852       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
853       PetscCall(VecScatterView(baij->Mvctx, viewer));
854       PetscFunctionReturn(0);
855     } else if (format == PETSC_VIEWER_ASCII_INFO) {
856       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
857       PetscFunctionReturn(0);
858     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
859       PetscFunctionReturn(0);
860     }
861   }
862 
863   if (isdraw) {
864     PetscDraw draw;
865     PetscBool isnull;
866     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
867     PetscCall(PetscDrawIsNull(draw, &isnull));
868     if (isnull) PetscFunctionReturn(0);
869   }
870 
871   {
872     /* assemble the entire matrix onto first processor. */
873     Mat           A;
874     Mat_SeqSBAIJ *Aloc;
875     Mat_SeqBAIJ  *Bloc;
876     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
877     MatScalar    *a;
878     const char   *matname;
879 
880     /* Should this be the same type as mat? */
881     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
882     if (rank == 0) {
883       PetscCall(MatSetSizes(A, M, N, M, N));
884     } else {
885       PetscCall(MatSetSizes(A, 0, 0, M, N));
886     }
887     PetscCall(MatSetType(A, MATMPISBAIJ));
888     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
889     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
890     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A));
891 
892     /* copy over the A part */
893     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
894     ai   = Aloc->i;
895     aj   = Aloc->j;
896     a    = Aloc->a;
897     PetscCall(PetscMalloc1(bs, &rvals));
898 
899     for (i = 0; i < mbs; i++) {
900       rvals[0] = bs * (baij->rstartbs + i);
901       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
902       for (j = ai[i]; j < ai[i + 1]; j++) {
903         col = (baij->cstartbs + aj[j]) * bs;
904         for (k = 0; k < bs; k++) {
905           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
906           col++;
907           a += bs;
908         }
909       }
910     }
911     /* copy over the B part */
912     Bloc = (Mat_SeqBAIJ *)baij->B->data;
913     ai   = Bloc->i;
914     aj   = Bloc->j;
915     a    = Bloc->a;
916     for (i = 0; i < mbs; i++) {
917       rvals[0] = bs * (baij->rstartbs + i);
918       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
919       for (j = ai[i]; j < ai[i + 1]; j++) {
920         col = baij->garray[aj[j]] * bs;
921         for (k = 0; k < bs; k++) {
922           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
923           col++;
924           a += bs;
925         }
926       }
927     }
928     PetscCall(PetscFree(rvals));
929     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
930     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
931     /*
932        Everyone has to call to draw the matrix since the graphics waits are
933        synchronized across all processors that share the PetscDraw object
934     */
935     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
936     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
937     if (rank == 0) {
938       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
939       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
940     }
941     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
942     PetscCall(PetscViewerFlush(viewer));
943     PetscCall(MatDestroy(&A));
944   }
945   PetscFunctionReturn(0);
946 }
947 
948 /* Used for both MPIBAIJ and MPISBAIJ matrices */
949 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
950 
951 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) {
952   PetscBool iascii, isdraw, issocket, isbinary;
953 
954   PetscFunctionBegin;
955   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
956   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
957   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
958   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
959   if (iascii || isdraw || issocket) {
960     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
961   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
962   PetscFunctionReturn(0);
963 }
964 
965 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) {
966   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
967 
968   PetscFunctionBegin;
969 #if defined(PETSC_USE_LOG)
970   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
971 #endif
972   PetscCall(MatStashDestroy_Private(&mat->stash));
973   PetscCall(MatStashDestroy_Private(&mat->bstash));
974   PetscCall(MatDestroy(&baij->A));
975   PetscCall(MatDestroy(&baij->B));
976 #if defined(PETSC_USE_CTABLE)
977   PetscCall(PetscTableDestroy(&baij->colmap));
978 #else
979   PetscCall(PetscFree(baij->colmap));
980 #endif
981   PetscCall(PetscFree(baij->garray));
982   PetscCall(VecDestroy(&baij->lvec));
983   PetscCall(VecScatterDestroy(&baij->Mvctx));
984   PetscCall(VecDestroy(&baij->slvec0));
985   PetscCall(VecDestroy(&baij->slvec0b));
986   PetscCall(VecDestroy(&baij->slvec1));
987   PetscCall(VecDestroy(&baij->slvec1a));
988   PetscCall(VecDestroy(&baij->slvec1b));
989   PetscCall(VecScatterDestroy(&baij->sMvctx));
990   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
991   PetscCall(PetscFree(baij->barray));
992   PetscCall(PetscFree(baij->hd));
993   PetscCall(VecDestroy(&baij->diag));
994   PetscCall(VecDestroy(&baij->bb1));
995   PetscCall(VecDestroy(&baij->xx1));
996 #if defined(PETSC_USE_REAL_MAT_SINGLE)
997   PetscCall(PetscFree(baij->setvaluescopy));
998 #endif
999   PetscCall(PetscFree(baij->in_loc));
1000   PetscCall(PetscFree(baij->v_loc));
1001   PetscCall(PetscFree(baij->rangebs));
1002   PetscCall(PetscFree(mat->data));
1003 
1004   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1005   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1006   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1007   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
1008   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
1009 #if defined(PETSC_HAVE_ELEMENTAL)
1010   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
1011 #endif
1012 #if defined(PETSC_HAVE_SCALAPACK)
1013   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
1014 #endif
1015   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
1016   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) {
1021   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1022   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1023   PetscScalar       *from;
1024   const PetscScalar *x;
1025 
1026   PetscFunctionBegin;
1027   /* diagonal part */
1028   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1029   PetscCall(VecSet(a->slvec1b, 0.0));
1030 
1031   /* subdiagonal part */
1032   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1033   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1034 
1035   /* copy x into the vec slvec0 */
1036   PetscCall(VecGetArray(a->slvec0, &from));
1037   PetscCall(VecGetArrayRead(xx, &x));
1038 
1039   PetscCall(PetscArraycpy(from, x, bs * mbs));
1040   PetscCall(VecRestoreArray(a->slvec0, &from));
1041   PetscCall(VecRestoreArrayRead(xx, &x));
1042 
1043   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1044   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1045   /* supperdiagonal part */
1046   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) {
1051   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1052   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1053   PetscScalar       *from;
1054   const PetscScalar *x;
1055 
1056   PetscFunctionBegin;
1057   /* diagonal part */
1058   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1059   PetscCall(VecSet(a->slvec1b, 0.0));
1060 
1061   /* subdiagonal part */
1062   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1063 
1064   /* copy x into the vec slvec0 */
1065   PetscCall(VecGetArray(a->slvec0, &from));
1066   PetscCall(VecGetArrayRead(xx, &x));
1067 
1068   PetscCall(PetscArraycpy(from, x, bs * mbs));
1069   PetscCall(VecRestoreArray(a->slvec0, &from));
1070   PetscCall(VecRestoreArrayRead(xx, &x));
1071 
1072   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1073   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1074   /* supperdiagonal part */
1075   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) {
1080   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1081   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1082   PetscScalar       *from, zero       = 0.0;
1083   const PetscScalar *x;
1084 
1085   PetscFunctionBegin;
1086   /* diagonal part */
1087   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1088   PetscCall(VecSet(a->slvec1b, zero));
1089 
1090   /* subdiagonal part */
1091   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1092   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1093 
1094   /* copy x into the vec slvec0 */
1095   PetscCall(VecGetArray(a->slvec0, &from));
1096   PetscCall(VecGetArrayRead(xx, &x));
1097   PetscCall(PetscArraycpy(from, x, bs * mbs));
1098   PetscCall(VecRestoreArray(a->slvec0, &from));
1099 
1100   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1101   PetscCall(VecRestoreArrayRead(xx, &x));
1102   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1103 
1104   /* supperdiagonal part */
1105   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1110   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1111   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1112   PetscScalar       *from, zero       = 0.0;
1113   const PetscScalar *x;
1114 
1115   PetscFunctionBegin;
1116   /* diagonal part */
1117   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1118   PetscCall(VecSet(a->slvec1b, zero));
1119 
1120   /* subdiagonal part */
1121   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1122 
1123   /* copy x into the vec slvec0 */
1124   PetscCall(VecGetArray(a->slvec0, &from));
1125   PetscCall(VecGetArrayRead(xx, &x));
1126   PetscCall(PetscArraycpy(from, x, bs * mbs));
1127   PetscCall(VecRestoreArray(a->slvec0, &from));
1128 
1129   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1130   PetscCall(VecRestoreArrayRead(xx, &x));
1131   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1132 
1133   /* supperdiagonal part */
1134   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 /*
1139   This only works correctly for square matrices where the subblock A->A is the
1140    diagonal block
1141 */
1142 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) {
1143   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1144 
1145   PetscFunctionBegin;
1146   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1147   PetscCall(MatGetDiagonal(a->A, v));
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) {
1152   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(MatScale(a->A, aa));
1156   PetscCall(MatScale(a->B, aa));
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1161   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1162   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1163   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1164   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1165   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1166 
1167   PetscFunctionBegin;
1168   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1169   mat->getrowactive = PETSC_TRUE;
1170 
1171   if (!mat->rowvalues && (idx || v)) {
1172     /*
1173         allocate enough space to hold information from the longest row.
1174     */
1175     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1176     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1177     PetscInt      max = 1, mbs = mat->mbs, tmp;
1178     for (i = 0; i < mbs; i++) {
1179       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1180       if (max < tmp) max = tmp;
1181     }
1182     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1183   }
1184 
1185   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1186   lrow = row - brstart; /* local row index */
1187 
1188   pvA = &vworkA;
1189   pcA = &cworkA;
1190   pvB = &vworkB;
1191   pcB = &cworkB;
1192   if (!v) {
1193     pvA = NULL;
1194     pvB = NULL;
1195   }
1196   if (!idx) {
1197     pcA = NULL;
1198     if (!v) pcB = NULL;
1199   }
1200   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1201   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1202   nztot = nzA + nzB;
1203 
1204   cmap = mat->garray;
1205   if (v || idx) {
1206     if (nztot) {
1207       /* Sort by increasing column numbers, assuming A and B already sorted */
1208       PetscInt imark = -1;
1209       if (v) {
1210         *v = v_p = mat->rowvalues;
1211         for (i = 0; i < nzB; i++) {
1212           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1213           else break;
1214         }
1215         imark = i;
1216         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1217         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1218       }
1219       if (idx) {
1220         *idx = idx_p = mat->rowindices;
1221         if (imark > -1) {
1222           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1223         } else {
1224           for (i = 0; i < nzB; i++) {
1225             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1226             else break;
1227           }
1228           imark = i;
1229         }
1230         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1231         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1232       }
1233     } else {
1234       if (idx) *idx = NULL;
1235       if (v) *v = NULL;
1236     }
1237   }
1238   *nz = nztot;
1239   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1240   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1245   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1246 
1247   PetscFunctionBegin;
1248   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1249   baij->getrowactive = PETSC_FALSE;
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) {
1254   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1255   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1256 
1257   PetscFunctionBegin;
1258   aA->getrow_utriangular = PETSC_TRUE;
1259   PetscFunctionReturn(0);
1260 }
1261 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) {
1262   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1263   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1264 
1265   PetscFunctionBegin;
1266   aA->getrow_utriangular = PETSC_FALSE;
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) {
1271   PetscFunctionBegin;
1272   if (PetscDefined(USE_COMPLEX)) {
1273     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1274 
1275     PetscCall(MatConjugate(a->A));
1276     PetscCall(MatConjugate(a->B));
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) {
1282   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1283 
1284   PetscFunctionBegin;
1285   PetscCall(MatRealPart(a->A));
1286   PetscCall(MatRealPart(a->B));
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) {
1291   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1292 
1293   PetscFunctionBegin;
1294   PetscCall(MatImaginaryPart(a->A));
1295   PetscCall(MatImaginaryPart(a->B));
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1300    Input: isrow       - distributed(parallel),
1301           iscol_local - locally owned (seq)
1302 */
1303 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) {
1304   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1305   const PetscInt *ptr1, *ptr2;
1306 
1307   PetscFunctionBegin;
1308   PetscCall(ISGetLocalSize(isrow, &sz1));
1309   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1310   if (sz1 > sz2) {
1311     *flg = PETSC_FALSE;
1312     PetscFunctionReturn(0);
1313   }
1314 
1315   PetscCall(ISGetIndices(isrow, &ptr1));
1316   PetscCall(ISGetIndices(iscol_local, &ptr2));
1317 
1318   PetscCall(PetscMalloc1(sz1, &a1));
1319   PetscCall(PetscMalloc1(sz2, &a2));
1320   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1321   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1322   PetscCall(PetscSortInt(sz1, a1));
1323   PetscCall(PetscSortInt(sz2, a2));
1324 
1325   nmatch = 0;
1326   k      = 0;
1327   for (i = 0; i < sz1; i++) {
1328     for (j = k; j < sz2; j++) {
1329       if (a1[i] == a2[j]) {
1330         k = j;
1331         nmatch++;
1332         break;
1333       }
1334     }
1335   }
1336   PetscCall(ISRestoreIndices(isrow, &ptr1));
1337   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1338   PetscCall(PetscFree(a1));
1339   PetscCall(PetscFree(a2));
1340   if (nmatch < sz1) {
1341     *flg = PETSC_FALSE;
1342   } else {
1343     *flg = PETSC_TRUE;
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
1349   IS        iscol_local;
1350   PetscInt  csize;
1351   PetscBool isequal;
1352 
1353   PetscFunctionBegin;
1354   PetscCall(ISGetLocalSize(iscol, &csize));
1355   if (call == MAT_REUSE_MATRIX) {
1356     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1357     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1358   } else {
1359     PetscBool issorted;
1360 
1361     PetscCall(ISAllGather(iscol, &iscol_local));
1362     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1363     PetscCall(ISSorted(iscol_local, &issorted));
1364     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1365   }
1366 
1367   /* now call MatCreateSubMatrix_MPIBAIJ() */
1368   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1369   if (call == MAT_INITIAL_MATRIX) {
1370     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1371     PetscCall(ISDestroy(&iscol_local));
1372   }
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) {
1377   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1378 
1379   PetscFunctionBegin;
1380   PetscCall(MatZeroEntries(l->A));
1381   PetscCall(MatZeroEntries(l->B));
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
1386   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1387   Mat            A = a->A, B = a->B;
1388   PetscLogDouble isend[5], irecv[5];
1389 
1390   PetscFunctionBegin;
1391   info->block_size = (PetscReal)matin->rmap->bs;
1392 
1393   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1394 
1395   isend[0] = info->nz_used;
1396   isend[1] = info->nz_allocated;
1397   isend[2] = info->nz_unneeded;
1398   isend[3] = info->memory;
1399   isend[4] = info->mallocs;
1400 
1401   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1402 
1403   isend[0] += info->nz_used;
1404   isend[1] += info->nz_allocated;
1405   isend[2] += info->nz_unneeded;
1406   isend[3] += info->memory;
1407   isend[4] += info->mallocs;
1408   if (flag == MAT_LOCAL) {
1409     info->nz_used      = isend[0];
1410     info->nz_allocated = isend[1];
1411     info->nz_unneeded  = isend[2];
1412     info->memory       = isend[3];
1413     info->mallocs      = isend[4];
1414   } else if (flag == MAT_GLOBAL_MAX) {
1415     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1416 
1417     info->nz_used      = irecv[0];
1418     info->nz_allocated = irecv[1];
1419     info->nz_unneeded  = irecv[2];
1420     info->memory       = irecv[3];
1421     info->mallocs      = irecv[4];
1422   } else if (flag == MAT_GLOBAL_SUM) {
1423     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1424 
1425     info->nz_used      = irecv[0];
1426     info->nz_allocated = irecv[1];
1427     info->nz_unneeded  = irecv[2];
1428     info->memory       = irecv[3];
1429     info->mallocs      = irecv[4];
1430   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1431   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1432   info->fill_ratio_needed = 0;
1433   info->factor_mallocs    = 0;
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) {
1438   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1439   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1440 
1441   PetscFunctionBegin;
1442   switch (op) {
1443   case MAT_NEW_NONZERO_LOCATIONS:
1444   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1445   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1446   case MAT_KEEP_NONZERO_PATTERN:
1447   case MAT_SUBMAT_SINGLEIS:
1448   case MAT_NEW_NONZERO_LOCATION_ERR:
1449     MatCheckPreallocated(A, 1);
1450     PetscCall(MatSetOption(a->A, op, flg));
1451     PetscCall(MatSetOption(a->B, op, flg));
1452     break;
1453   case MAT_ROW_ORIENTED:
1454     MatCheckPreallocated(A, 1);
1455     a->roworiented = flg;
1456 
1457     PetscCall(MatSetOption(a->A, op, flg));
1458     PetscCall(MatSetOption(a->B, op, flg));
1459     break;
1460   case MAT_FORCE_DIAGONAL_ENTRIES:
1461   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
1462   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
1463   case MAT_USE_HASH_TABLE: a->ht_flag = flg; break;
1464   case MAT_HERMITIAN: MatCheckPreallocated(A, 1); PetscCall(MatSetOption(a->A, op, flg));
1465 #if defined(PETSC_USE_COMPLEX)
1466     if (flg) { /* need different mat-vec ops */
1467       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1468       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1469       A->ops->multtranspose    = NULL;
1470       A->ops->multtransposeadd = NULL;
1471       A->symmetric             = PETSC_BOOL3_FALSE;
1472     }
1473 #endif
1474     break;
1475   case MAT_SPD:
1476   case MAT_SYMMETRIC: MatCheckPreallocated(A, 1); PetscCall(MatSetOption(a->A, op, flg));
1477 #if defined(PETSC_USE_COMPLEX)
1478     if (flg) { /* restore to use default mat-vec ops */
1479       A->ops->mult             = MatMult_MPISBAIJ;
1480       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1481       A->ops->multtranspose    = MatMult_MPISBAIJ;
1482       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1483     }
1484 #endif
1485     break;
1486   case MAT_STRUCTURALLY_SYMMETRIC:
1487     MatCheckPreallocated(A, 1);
1488     PetscCall(MatSetOption(a->A, op, flg));
1489     break;
1490   case MAT_SYMMETRY_ETERNAL:
1491   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1492     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1493     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1494     break;
1495   case MAT_SPD_ETERNAL: break;
1496   case MAT_IGNORE_LOWER_TRIANGULAR: aA->ignore_ltriangular = flg; break;
1497   case MAT_ERROR_LOWER_TRIANGULAR: aA->ignore_ltriangular = flg; break;
1498   case MAT_GETROW_UPPERTRIANGULAR: aA->getrow_utriangular = flg; break;
1499   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) {
1505   PetscFunctionBegin;
1506   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1507   if (reuse == MAT_INITIAL_MATRIX) {
1508     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1509   } else if (reuse == MAT_REUSE_MATRIX) {
1510     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1511   }
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) {
1516   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1517   Mat           a = baij->A, b = baij->B;
1518   PetscInt      nv, m, n;
1519   PetscBool     flg;
1520 
1521   PetscFunctionBegin;
1522   if (ll != rr) {
1523     PetscCall(VecEqual(ll, rr, &flg));
1524     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1525   }
1526   if (!ll) PetscFunctionReturn(0);
1527 
1528   PetscCall(MatGetLocalSize(mat, &m, &n));
1529   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1530 
1531   PetscCall(VecGetLocalSize(rr, &nv));
1532   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1533 
1534   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1535 
1536   /* left diagonalscale the off-diagonal part */
1537   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1538 
1539   /* scale the diagonal part */
1540   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1541 
1542   /* right diagonalscale the off-diagonal part */
1543   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1544   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) {
1549   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1550 
1551   PetscFunctionBegin;
1552   PetscCall(MatSetUnfactored(a->A));
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1557 
1558 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) {
1559   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1560   Mat           a, b, c, d;
1561   PetscBool     flg;
1562 
1563   PetscFunctionBegin;
1564   a = matA->A;
1565   b = matA->B;
1566   c = matB->A;
1567   d = matB->B;
1568 
1569   PetscCall(MatEqual(a, c, &flg));
1570   if (flg) PetscCall(MatEqual(b, d, &flg));
1571   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) {
1576   PetscBool isbaij;
1577 
1578   PetscFunctionBegin;
1579   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1580   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1581   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1582   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1583     PetscCall(MatGetRowUpperTriangular(A));
1584     PetscCall(MatCopy_Basic(A, B, str));
1585     PetscCall(MatRestoreRowUpperTriangular(A));
1586   } else {
1587     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1588     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1589 
1590     PetscCall(MatCopy(a->A, b->A, str));
1591     PetscCall(MatCopy(a->B, b->B, str));
1592   }
1593   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) {
1598   PetscFunctionBegin;
1599   PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
1604   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1605   PetscBLASInt  bnz, one                          = 1;
1606   Mat_SeqSBAIJ *xa, *ya;
1607   Mat_SeqBAIJ  *xb, *yb;
1608 
1609   PetscFunctionBegin;
1610   if (str == SAME_NONZERO_PATTERN) {
1611     PetscScalar alpha = a;
1612     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1613     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1614     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1615     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1616     xb = (Mat_SeqBAIJ *)xx->B->data;
1617     yb = (Mat_SeqBAIJ *)yy->B->data;
1618     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1619     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1620     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1621   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1622     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1623     PetscCall(MatAXPY_Basic(Y, a, X, str));
1624     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1625   } else {
1626     Mat       B;
1627     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1628     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1629     PetscCall(MatGetRowUpperTriangular(X));
1630     PetscCall(MatGetRowUpperTriangular(Y));
1631     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1632     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1633     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1634     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1635     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1636     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1637     PetscCall(MatSetType(B, MATMPISBAIJ));
1638     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1639     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1640     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1641     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1642     PetscCall(MatHeaderMerge(Y, &B));
1643     PetscCall(PetscFree(nnz_d));
1644     PetscCall(PetscFree(nnz_o));
1645     PetscCall(MatRestoreRowUpperTriangular(X));
1646     PetscCall(MatRestoreRowUpperTriangular(Y));
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) {
1652   PetscInt  i;
1653   PetscBool flg;
1654 
1655   PetscFunctionBegin;
1656   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1657   for (i = 0; i < n; i++) {
1658     PetscCall(ISEqual(irow[i], icol[i], &flg));
1659     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1660   }
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) {
1665   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1666   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1667 
1668   PetscFunctionBegin;
1669   if (!Y->preallocated) {
1670     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1671   } else if (!aij->nz) {
1672     PetscInt nonew = aij->nonew;
1673     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1674     aij->nonew = nonew;
1675   }
1676   PetscCall(MatShift_Basic(Y, a));
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) {
1681   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1682 
1683   PetscFunctionBegin;
1684   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1685   PetscCall(MatMissingDiagonal(a->A, missing, d));
1686   if (d) {
1687     PetscInt rstart;
1688     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1689     *d += rstart / A->rmap->bs;
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) {
1695   PetscFunctionBegin;
1696   *a = ((Mat_MPISBAIJ *)A->data)->A;
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 /* -------------------------------------------------------------------*/
1701 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1702                                        MatGetRow_MPISBAIJ,
1703                                        MatRestoreRow_MPISBAIJ,
1704                                        MatMult_MPISBAIJ,
1705                                        /*  4*/ MatMultAdd_MPISBAIJ,
1706                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1707                                        MatMultAdd_MPISBAIJ,
1708                                        NULL,
1709                                        NULL,
1710                                        NULL,
1711                                        /* 10*/ NULL,
1712                                        NULL,
1713                                        NULL,
1714                                        MatSOR_MPISBAIJ,
1715                                        MatTranspose_MPISBAIJ,
1716                                        /* 15*/ MatGetInfo_MPISBAIJ,
1717                                        MatEqual_MPISBAIJ,
1718                                        MatGetDiagonal_MPISBAIJ,
1719                                        MatDiagonalScale_MPISBAIJ,
1720                                        MatNorm_MPISBAIJ,
1721                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1722                                        MatAssemblyEnd_MPISBAIJ,
1723                                        MatSetOption_MPISBAIJ,
1724                                        MatZeroEntries_MPISBAIJ,
1725                                        /* 24*/ NULL,
1726                                        NULL,
1727                                        NULL,
1728                                        NULL,
1729                                        NULL,
1730                                        /* 29*/ MatSetUp_MPISBAIJ,
1731                                        NULL,
1732                                        NULL,
1733                                        MatGetDiagonalBlock_MPISBAIJ,
1734                                        NULL,
1735                                        /* 34*/ MatDuplicate_MPISBAIJ,
1736                                        NULL,
1737                                        NULL,
1738                                        NULL,
1739                                        NULL,
1740                                        /* 39*/ MatAXPY_MPISBAIJ,
1741                                        MatCreateSubMatrices_MPISBAIJ,
1742                                        MatIncreaseOverlap_MPISBAIJ,
1743                                        MatGetValues_MPISBAIJ,
1744                                        MatCopy_MPISBAIJ,
1745                                        /* 44*/ NULL,
1746                                        MatScale_MPISBAIJ,
1747                                        MatShift_MPISBAIJ,
1748                                        NULL,
1749                                        NULL,
1750                                        /* 49*/ NULL,
1751                                        NULL,
1752                                        NULL,
1753                                        NULL,
1754                                        NULL,
1755                                        /* 54*/ NULL,
1756                                        NULL,
1757                                        MatSetUnfactored_MPISBAIJ,
1758                                        NULL,
1759                                        MatSetValuesBlocked_MPISBAIJ,
1760                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1761                                        NULL,
1762                                        NULL,
1763                                        NULL,
1764                                        NULL,
1765                                        /* 64*/ NULL,
1766                                        NULL,
1767                                        NULL,
1768                                        NULL,
1769                                        NULL,
1770                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1771                                        NULL,
1772                                        MatConvert_MPISBAIJ_Basic,
1773                                        NULL,
1774                                        NULL,
1775                                        /* 74*/ NULL,
1776                                        NULL,
1777                                        NULL,
1778                                        NULL,
1779                                        NULL,
1780                                        /* 79*/ NULL,
1781                                        NULL,
1782                                        NULL,
1783                                        NULL,
1784                                        MatLoad_MPISBAIJ,
1785                                        /* 84*/ NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        /* 89*/ NULL,
1791                                        NULL,
1792                                        NULL,
1793                                        NULL,
1794                                        NULL,
1795                                        /* 94*/ NULL,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        /* 99*/ NULL,
1801                                        NULL,
1802                                        NULL,
1803                                        MatConjugate_MPISBAIJ,
1804                                        NULL,
1805                                        /*104*/ NULL,
1806                                        MatRealPart_MPISBAIJ,
1807                                        MatImaginaryPart_MPISBAIJ,
1808                                        MatGetRowUpperTriangular_MPISBAIJ,
1809                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1810                                        /*109*/ NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        MatMissingDiagonal_MPISBAIJ,
1815                                        /*114*/ NULL,
1816                                        NULL,
1817                                        NULL,
1818                                        NULL,
1819                                        NULL,
1820                                        /*119*/ NULL,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        /*124*/ NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        /*129*/ NULL,
1831                                        NULL,
1832                                        NULL,
1833                                        NULL,
1834                                        NULL,
1835                                        /*134*/ NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        /*139*/ MatSetBlockSizes_Default,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        NULL,
1845                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        /*150*/ NULL};
1852 
1853 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) {
1854   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1855   PetscInt      i, mbs, Mbs;
1856   PetscMPIInt   size;
1857 
1858   PetscFunctionBegin;
1859   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1860   PetscCall(PetscLayoutSetUp(B->rmap));
1861   PetscCall(PetscLayoutSetUp(B->cmap));
1862   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1863   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1864   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1865 
1866   mbs = B->rmap->n / bs;
1867   Mbs = B->rmap->N / bs;
1868   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1869 
1870   B->rmap->bs = bs;
1871   b->bs2      = bs * bs;
1872   b->mbs      = mbs;
1873   b->Mbs      = Mbs;
1874   b->nbs      = B->cmap->n / bs;
1875   b->Nbs      = B->cmap->N / bs;
1876 
1877   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1878   b->rstartbs = B->rmap->rstart / bs;
1879   b->rendbs   = B->rmap->rend / bs;
1880 
1881   b->cstartbs = B->cmap->rstart / bs;
1882   b->cendbs   = B->cmap->rend / bs;
1883 
1884 #if defined(PETSC_USE_CTABLE)
1885   PetscCall(PetscTableDestroy(&b->colmap));
1886 #else
1887   PetscCall(PetscFree(b->colmap));
1888 #endif
1889   PetscCall(PetscFree(b->garray));
1890   PetscCall(VecDestroy(&b->lvec));
1891   PetscCall(VecScatterDestroy(&b->Mvctx));
1892   PetscCall(VecDestroy(&b->slvec0));
1893   PetscCall(VecDestroy(&b->slvec0b));
1894   PetscCall(VecDestroy(&b->slvec1));
1895   PetscCall(VecDestroy(&b->slvec1a));
1896   PetscCall(VecDestroy(&b->slvec1b));
1897   PetscCall(VecScatterDestroy(&b->sMvctx));
1898 
1899   /* Because the B will have been resized we simply destroy it and create a new one each time */
1900   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1901   PetscCall(MatDestroy(&b->B));
1902   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1903   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1904   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1905   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B));
1906 
1907   if (!B->preallocated) {
1908     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1909     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1910     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1911     PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A));
1912     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1913   }
1914 
1915   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1916   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1917 
1918   B->preallocated  = PETSC_TRUE;
1919   B->was_assembled = PETSC_FALSE;
1920   B->assembled     = PETSC_FALSE;
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) {
1925   PetscInt        m, rstart, cend;
1926   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1927   const PetscInt *JJ          = NULL;
1928   PetscScalar    *values      = NULL;
1929   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1930   PetscBool       nooffprocentries;
1931 
1932   PetscFunctionBegin;
1933   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1934   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1935   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1936   PetscCall(PetscLayoutSetUp(B->rmap));
1937   PetscCall(PetscLayoutSetUp(B->cmap));
1938   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1939   m      = B->rmap->n / bs;
1940   rstart = B->rmap->rstart / bs;
1941   cend   = B->cmap->rend / bs;
1942 
1943   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
1944   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
1945   for (i = 0; i < m; i++) {
1946     nz = ii[i + 1] - ii[i];
1947     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
1948     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
1949     JJ = jj + ii[i];
1950     bd = 0;
1951     for (j = 0; j < nz; j++) {
1952       if (*JJ >= i + rstart) break;
1953       JJ++;
1954       bd++;
1955     }
1956     d = 0;
1957     for (; j < nz; j++) {
1958       if (*JJ++ >= cend) break;
1959       d++;
1960     }
1961     d_nnz[i] = d;
1962     o_nnz[i] = nz - d - bd;
1963     nz       = nz - bd;
1964     nz_max   = PetscMax(nz_max, nz);
1965   }
1966   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
1967   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
1968   PetscCall(PetscFree2(d_nnz, o_nnz));
1969 
1970   values = (PetscScalar *)V;
1971   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
1972   for (i = 0; i < m; i++) {
1973     PetscInt        row   = i + rstart;
1974     PetscInt        ncols = ii[i + 1] - ii[i];
1975     const PetscInt *icols = jj + ii[i];
1976     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
1977       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
1978       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
1979     } else { /* block ordering does not match so we can only insert one block at a time. */
1980       PetscInt j;
1981       for (j = 0; j < ncols; j++) {
1982         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
1983         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
1984       }
1985     }
1986   }
1987 
1988   if (!V) PetscCall(PetscFree(values));
1989   nooffprocentries    = B->nooffprocentries;
1990   B->nooffprocentries = PETSC_TRUE;
1991   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1992   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1993   B->nooffprocentries = nooffprocentries;
1994 
1995   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 /*MC
2000    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2001    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2002    the matrix is stored.
2003 
2004    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2005    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2006 
2007    Options Database Keys:
2008 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2009 
2010    Note:
2011      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2012      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2013 
2014    Level: beginner
2015 
2016 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2017 M*/
2018 
2019 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) {
2020   Mat_MPISBAIJ *b;
2021   PetscBool     flg = PETSC_FALSE;
2022 
2023   PetscFunctionBegin;
2024   PetscCall(PetscNewLog(B, &b));
2025   B->data = (void *)b;
2026   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2027 
2028   B->ops->destroy = MatDestroy_MPISBAIJ;
2029   B->ops->view    = MatView_MPISBAIJ;
2030   B->assembled    = PETSC_FALSE;
2031   B->insertmode   = NOT_SET_VALUES;
2032 
2033   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2034   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2035 
2036   /* build local table of row and column ownerships */
2037   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2038 
2039   /* build cache for off array entries formed */
2040   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2041 
2042   b->donotstash  = PETSC_FALSE;
2043   b->colmap      = NULL;
2044   b->garray      = NULL;
2045   b->roworiented = PETSC_TRUE;
2046 
2047   /* stuff used in block assembly */
2048   b->barray = NULL;
2049 
2050   /* stuff used for matrix vector multiply */
2051   b->lvec    = NULL;
2052   b->Mvctx   = NULL;
2053   b->slvec0  = NULL;
2054   b->slvec0b = NULL;
2055   b->slvec1  = NULL;
2056   b->slvec1a = NULL;
2057   b->slvec1b = NULL;
2058   b->sMvctx  = NULL;
2059 
2060   /* stuff for MatGetRow() */
2061   b->rowindices   = NULL;
2062   b->rowvalues    = NULL;
2063   b->getrowactive = PETSC_FALSE;
2064 
2065   /* hash table stuff */
2066   b->ht           = NULL;
2067   b->hd           = NULL;
2068   b->ht_size      = 0;
2069   b->ht_flag      = PETSC_FALSE;
2070   b->ht_fact      = 0;
2071   b->ht_total_ct  = 0;
2072   b->ht_insert_ct = 0;
2073 
2074   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2075   b->ijonly = PETSC_FALSE;
2076 
2077   b->in_loc = NULL;
2078   b->v_loc  = NULL;
2079   b->n_loc  = 0;
2080 
2081   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2082   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2083   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2084   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2085 #if defined(PETSC_HAVE_ELEMENTAL)
2086   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2087 #endif
2088 #if defined(PETSC_HAVE_SCALAPACK)
2089   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2090 #endif
2091   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2092   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2093 
2094   B->symmetric                   = PETSC_BOOL3_TRUE;
2095   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2096   B->symmetry_eternal            = PETSC_TRUE;
2097   B->structural_symmetry_eternal = PETSC_TRUE;
2098 #if defined(PETSC_USE_COMPLEX)
2099   B->hermitian = PETSC_BOOL3_FALSE;
2100 #else
2101   B->hermitian = PETSC_BOOL3_TRUE;
2102 #endif
2103 
2104   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2105   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2106   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2107   if (flg) {
2108     PetscReal fact = 1.39;
2109     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2110     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2111     if (fact <= 1.0) fact = 1.39;
2112     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2113     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2114   }
2115   PetscOptionsEnd();
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 /*MC
2120    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2121 
2122    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2123    and `MATMPISBAIJ` otherwise.
2124 
2125    Options Database Key:
2126 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2127 
2128   Level: beginner
2129 
2130 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2131 M*/
2132 
2133 /*@C
2134    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2135    the user should preallocate the matrix storage by setting the parameters
2136    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2137    performance can be increased by more than a factor of 50.
2138 
2139    Collective on B
2140 
2141    Input Parameters:
2142 +  B - the matrix
2143 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2144           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2145 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2146            submatrix  (same for all local rows)
2147 .  d_nnz - array containing the number of block nonzeros in the various block rows
2148            in the upper triangular and diagonal part of the in diagonal portion of the local
2149            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2150            for the diagonal entry and set a value even if it is zero.
2151 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2152            submatrix (same for all local rows).
2153 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2154            off-diagonal portion of the local submatrix that is right of the diagonal
2155            (possibly different for each block row) or NULL.
2156 
2157    Options Database Keys:
2158 +   -mat_no_unroll - uses code that does not unroll the loops in the
2159                      block calculations (much slower)
2160 -   -mat_block_size - size of the blocks to use
2161 
2162    Notes:
2163 
2164    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2165    than it must be used on all processors that share the object for that argument.
2166 
2167    If the *_nnz parameter is given then the *_nz parameter is ignored
2168 
2169    Storage Information:
2170    For a square global matrix we define each processor's diagonal portion
2171    to be its local rows and the corresponding columns (a square submatrix);
2172    each processor's off-diagonal portion encompasses the remainder of the
2173    local matrix (a rectangular submatrix).
2174 
2175    The user can specify preallocated storage for the diagonal part of
2176    the local submatrix with either d_nz or d_nnz (not both).  Set
2177    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2178    memory allocation.  Likewise, specify preallocated storage for the
2179    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2180 
2181    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2182    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2183    You can also run with the option -info and look for messages with the string
2184    malloc in them to see if additional memory allocation was needed.
2185 
2186    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2187    the figure below we depict these three local rows and all columns (0-11).
2188 
2189 .vb
2190            0 1 2 3 4 5 6 7 8 9 10 11
2191           --------------------------
2192    row 3  |. . . d d d o o o o  o  o
2193    row 4  |. . . d d d o o o o  o  o
2194    row 5  |. . . d d d o o o o  o  o
2195           --------------------------
2196 .ve
2197 
2198    Thus, any entries in the d locations are stored in the d (diagonal)
2199    submatrix, and any entries in the o locations are stored in the
2200    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2201    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2202 
2203    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2204    plus the diagonal part of the d matrix,
2205    and o_nz should indicate the number of block nonzeros per row in the o matrix
2206 
2207    In general, for PDE problems in which most nonzeros are near the diagonal,
2208    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2209    or you will get TERRIBLE performance; see the users' manual chapter on
2210    matrices.
2211 
2212    Level: intermediate
2213 
2214 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2215 @*/
2216 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2217   PetscFunctionBegin;
2218   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2219   PetscValidType(B, 1);
2220   PetscValidLogicalCollectiveInt(B, bs, 2);
2221   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 /*@C
2226    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2227    (block compressed row).  For good matrix assembly performance
2228    the user should preallocate the matrix storage by setting the parameters
2229    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2230    performance can be increased by more than a factor of 50.
2231 
2232    Collective
2233 
2234    Input Parameters:
2235 +  comm - MPI communicator
2236 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2237           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2238 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2239            This value should be the same as the local size used in creating the
2240            y vector for the matrix-vector product y = Ax.
2241 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2242            This value should be the same as the local size used in creating the
2243            x vector for the matrix-vector product y = Ax.
2244 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2245 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2246 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2247            submatrix  (same for all local rows)
2248 .  d_nnz - array containing the number of block nonzeros in the various block rows
2249            in the upper triangular portion of the in diagonal portion of the local
2250            (possibly different for each block block row) or NULL.
2251            If you plan to factor the matrix you must leave room for the diagonal entry and
2252            set its value even if it is zero.
2253 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2254            submatrix (same for all local rows).
2255 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2256            off-diagonal portion of the local submatrix (possibly different for
2257            each block row) or NULL.
2258 
2259    Output Parameter:
2260 .  A - the matrix
2261 
2262    Options Database Keys:
2263 +   -mat_no_unroll - uses code that does not unroll the loops in the
2264                      block calculations (much slower)
2265 .   -mat_block_size - size of the blocks to use
2266 -   -mat_mpi - use the parallel matrix data structures even on one processor
2267                (defaults to using SeqBAIJ format on one processor)
2268 
2269    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2270    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2271    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2272 
2273    Notes:
2274    The number of rows and columns must be divisible by blocksize.
2275    This matrix type does not support complex Hermitian operation.
2276 
2277    The user MUST specify either the local or global matrix dimensions
2278    (possibly both).
2279 
2280    If` PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2281    than it must be used on all processors that share the object for that argument.
2282 
2283    If the *_nnz parameter is given then the *_nz parameter is ignored
2284 
2285    Storage Information:
2286    For a square global matrix we define each processor's diagonal portion
2287    to be its local rows and the corresponding columns (a square submatrix);
2288    each processor's off-diagonal portion encompasses the remainder of the
2289    local matrix (a rectangular submatrix).
2290 
2291    The user can specify preallocated storage for the diagonal part of
2292    the local submatrix with either d_nz or d_nnz (not both).  Set
2293    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2294    memory allocation.  Likewise, specify preallocated storage for the
2295    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2296 
2297    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2298    the figure below we depict these three local rows and all columns (0-11).
2299 
2300 .vb
2301            0 1 2 3 4 5 6 7 8 9 10 11
2302           --------------------------
2303    row 3  |. . . d d d o o o o  o  o
2304    row 4  |. . . d d d o o o o  o  o
2305    row 5  |. . . d d d o o o o  o  o
2306           --------------------------
2307 .ve
2308 
2309    Thus, any entries in the d locations are stored in the d (diagonal)
2310    submatrix, and any entries in the o locations are stored in the
2311    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2312    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2313 
2314    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2315    plus the diagonal part of the d matrix,
2316    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2317    In general, for PDE problems in which most nonzeros are near the diagonal,
2318    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2319    or you will get TERRIBLE performance; see the users' manual chapter on
2320    matrices.
2321 
2322    Level: intermediate
2323 
2324 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2325 @*/
2326 
2327 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
2328   PetscMPIInt size;
2329 
2330   PetscFunctionBegin;
2331   PetscCall(MatCreate(comm, A));
2332   PetscCall(MatSetSizes(*A, m, n, M, N));
2333   PetscCallMPI(MPI_Comm_size(comm, &size));
2334   if (size > 1) {
2335     PetscCall(MatSetType(*A, MATMPISBAIJ));
2336     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2337   } else {
2338     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2339     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2340   }
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
2345   Mat           mat;
2346   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2347   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2348   PetscScalar  *array;
2349 
2350   PetscFunctionBegin;
2351   *newmat = NULL;
2352 
2353   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2354   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2355   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2356   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2357   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2358 
2359   mat->factortype   = matin->factortype;
2360   mat->preallocated = PETSC_TRUE;
2361   mat->assembled    = PETSC_TRUE;
2362   mat->insertmode   = NOT_SET_VALUES;
2363 
2364   a      = (Mat_MPISBAIJ *)mat->data;
2365   a->bs2 = oldmat->bs2;
2366   a->mbs = oldmat->mbs;
2367   a->nbs = oldmat->nbs;
2368   a->Mbs = oldmat->Mbs;
2369   a->Nbs = oldmat->Nbs;
2370 
2371   a->size         = oldmat->size;
2372   a->rank         = oldmat->rank;
2373   a->donotstash   = oldmat->donotstash;
2374   a->roworiented  = oldmat->roworiented;
2375   a->rowindices   = NULL;
2376   a->rowvalues    = NULL;
2377   a->getrowactive = PETSC_FALSE;
2378   a->barray       = NULL;
2379   a->rstartbs     = oldmat->rstartbs;
2380   a->rendbs       = oldmat->rendbs;
2381   a->cstartbs     = oldmat->cstartbs;
2382   a->cendbs       = oldmat->cendbs;
2383 
2384   /* hash table stuff */
2385   a->ht           = NULL;
2386   a->hd           = NULL;
2387   a->ht_size      = 0;
2388   a->ht_flag      = oldmat->ht_flag;
2389   a->ht_fact      = oldmat->ht_fact;
2390   a->ht_total_ct  = 0;
2391   a->ht_insert_ct = 0;
2392 
2393   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2394   if (oldmat->colmap) {
2395 #if defined(PETSC_USE_CTABLE)
2396     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2397 #else
2398     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2399     PetscCall(PetscLogObjectMemory((PetscObject)mat, (a->Nbs) * sizeof(PetscInt)));
2400     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2401 #endif
2402   } else a->colmap = NULL;
2403 
2404   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2405     PetscCall(PetscMalloc1(len, &a->garray));
2406     PetscCall(PetscLogObjectMemory((PetscObject)mat, len * sizeof(PetscInt)));
2407     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2408   } else a->garray = NULL;
2409 
2410   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2411   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2412   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->lvec));
2413   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2414   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->Mvctx));
2415 
2416   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2417   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec0));
2418   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2419   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec1));
2420 
2421   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2422   PetscCall(VecGetArray(a->slvec1, &array));
2423   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2424   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2425   PetscCall(VecRestoreArray(a->slvec1, &array));
2426   PetscCall(VecGetArray(a->slvec0, &array));
2427   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2428   PetscCall(VecRestoreArray(a->slvec0, &array));
2429   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec0));
2430   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec1));
2431   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec0b));
2432   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec1a));
2433   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec1b));
2434 
2435   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2436   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2437   a->sMvctx = oldmat->sMvctx;
2438   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->sMvctx));
2439 
2440   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2441   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
2442   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2443   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->B));
2444   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2445   *newmat = mat;
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2450 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2451 
2452 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) {
2453   PetscBool isbinary;
2454 
2455   PetscFunctionBegin;
2456   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2457   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2458   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) {
2463   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2464   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2465   PetscReal     atmp;
2466   PetscReal    *work, *svalues, *rvalues;
2467   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2468   PetscMPIInt   rank, size;
2469   PetscInt     *rowners_bs, dest, count, source;
2470   PetscScalar  *va;
2471   MatScalar    *ba;
2472   MPI_Status    stat;
2473 
2474   PetscFunctionBegin;
2475   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2476   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2477   PetscCall(VecGetArray(v, &va));
2478 
2479   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2480   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2481 
2482   bs  = A->rmap->bs;
2483   mbs = a->mbs;
2484   Mbs = a->Mbs;
2485   ba  = b->a;
2486   bi  = b->i;
2487   bj  = b->j;
2488 
2489   /* find ownerships */
2490   rowners_bs = A->rmap->range;
2491 
2492   /* each proc creates an array to be distributed */
2493   PetscCall(PetscCalloc1(bs * Mbs, &work));
2494 
2495   /* row_max for B */
2496   if (rank != size - 1) {
2497     for (i = 0; i < mbs; i++) {
2498       ncols = bi[1] - bi[0];
2499       bi++;
2500       brow = bs * i;
2501       for (j = 0; j < ncols; j++) {
2502         bcol = bs * (*bj);
2503         for (kcol = 0; kcol < bs; kcol++) {
2504           col = bcol + kcol;           /* local col index */
2505           col += rowners_bs[rank + 1]; /* global col index */
2506           for (krow = 0; krow < bs; krow++) {
2507             atmp = PetscAbsScalar(*ba);
2508             ba++;
2509             row = brow + krow; /* local row index */
2510             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2511             if (work[col] < atmp) work[col] = atmp;
2512           }
2513         }
2514         bj++;
2515       }
2516     }
2517 
2518     /* send values to its owners */
2519     for (dest = rank + 1; dest < size; dest++) {
2520       svalues = work + rowners_bs[dest];
2521       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2522       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2523     }
2524   }
2525 
2526   /* receive values */
2527   if (rank) {
2528     rvalues = work;
2529     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2530     for (source = 0; source < rank; source++) {
2531       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2532       /* process values */
2533       for (i = 0; i < count; i++) {
2534         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2535       }
2536     }
2537   }
2538 
2539   PetscCall(VecRestoreArray(v, &va));
2540   PetscCall(PetscFree(work));
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
2545   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2546   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2547   PetscScalar       *x, *ptr, *from;
2548   Vec                bb1;
2549   const PetscScalar *b;
2550 
2551   PetscFunctionBegin;
2552   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2553   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2554 
2555   if (flag == SOR_APPLY_UPPER) {
2556     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2557     PetscFunctionReturn(0);
2558   }
2559 
2560   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2561     if (flag & SOR_ZERO_INITIAL_GUESS) {
2562       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2563       its--;
2564     }
2565 
2566     PetscCall(VecDuplicate(bb, &bb1));
2567     while (its--) {
2568       /* lower triangular part: slvec0b = - B^T*xx */
2569       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2570 
2571       /* copy xx into slvec0a */
2572       PetscCall(VecGetArray(mat->slvec0, &ptr));
2573       PetscCall(VecGetArray(xx, &x));
2574       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2575       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2576 
2577       PetscCall(VecScale(mat->slvec0, -1.0));
2578 
2579       /* copy bb into slvec1a */
2580       PetscCall(VecGetArray(mat->slvec1, &ptr));
2581       PetscCall(VecGetArrayRead(bb, &b));
2582       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2583       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2584 
2585       /* set slvec1b = 0 */
2586       PetscCall(VecSet(mat->slvec1b, 0.0));
2587 
2588       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2589       PetscCall(VecRestoreArray(xx, &x));
2590       PetscCall(VecRestoreArrayRead(bb, &b));
2591       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2592 
2593       /* upper triangular part: bb1 = bb1 - B*x */
2594       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2595 
2596       /* local diagonal sweep */
2597       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2598     }
2599     PetscCall(VecDestroy(&bb1));
2600   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2601     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2602   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2603     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2604   } else if (flag & SOR_EISENSTAT) {
2605     Vec                xx1;
2606     PetscBool          hasop;
2607     const PetscScalar *diag;
2608     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2609     PetscInt           i, n;
2610 
2611     if (!mat->xx1) {
2612       PetscCall(VecDuplicate(bb, &mat->xx1));
2613       PetscCall(VecDuplicate(bb, &mat->bb1));
2614     }
2615     xx1 = mat->xx1;
2616     bb1 = mat->bb1;
2617 
2618     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2619 
2620     if (!mat->diag) {
2621       /* this is wrong for same matrix with new nonzero values */
2622       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2623       PetscCall(MatGetDiagonal(matin, mat->diag));
2624     }
2625     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2626 
2627     if (hasop) {
2628       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2629       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2630     } else {
2631       /*
2632           These two lines are replaced by code that may be a bit faster for a good compiler
2633       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2634       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2635       */
2636       PetscCall(VecGetArray(mat->slvec1a, &sl));
2637       PetscCall(VecGetArrayRead(mat->diag, &diag));
2638       PetscCall(VecGetArrayRead(bb, &b));
2639       PetscCall(VecGetArray(xx, &x));
2640       PetscCall(VecGetLocalSize(xx, &n));
2641       if (omega == 1.0) {
2642         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2643         PetscCall(PetscLogFlops(2.0 * n));
2644       } else {
2645         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2646         PetscCall(PetscLogFlops(3.0 * n));
2647       }
2648       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2649       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2650       PetscCall(VecRestoreArrayRead(bb, &b));
2651       PetscCall(VecRestoreArray(xx, &x));
2652     }
2653 
2654     /* multiply off-diagonal portion of matrix */
2655     PetscCall(VecSet(mat->slvec1b, 0.0));
2656     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2657     PetscCall(VecGetArray(mat->slvec0, &from));
2658     PetscCall(VecGetArray(xx, &x));
2659     PetscCall(PetscArraycpy(from, x, bs * mbs));
2660     PetscCall(VecRestoreArray(mat->slvec0, &from));
2661     PetscCall(VecRestoreArray(xx, &x));
2662     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2663     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2664     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2665 
2666     /* local sweep */
2667     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2668     PetscCall(VecAXPY(xx, 1.0, xx1));
2669   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 /*@
2674      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2675          CSR format the local rows.
2676 
2677    Collective
2678 
2679    Input Parameters:
2680 +  comm - MPI communicator
2681 .  bs - the block size, only a block size of 1 is supported
2682 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2683 .  n - This value should be the same as the local size used in creating the
2684        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2685        calculated if N is given) For square matrices n is almost always m.
2686 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2687 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2688 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2689 .   j - column indices
2690 -   a - matrix values
2691 
2692    Output Parameter:
2693 .   mat - the matrix
2694 
2695    Level: intermediate
2696 
2697    Notes:
2698        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2699      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2700      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2701 
2702        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2703 
2704 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2705           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2706 @*/
2707 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
2708   PetscFunctionBegin;
2709   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2710   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2711   PetscCall(MatCreate(comm, mat));
2712   PetscCall(MatSetSizes(*mat, m, n, M, N));
2713   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2714   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 /*@C
2719    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2720 
2721    Collective
2722 
2723    Input Parameters:
2724 +  B - the matrix
2725 .  bs - the block size
2726 .  i - the indices into j for the start of each local row (starts with zero)
2727 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2728 -  v - optional values in the matrix
2729 
2730    Level: advanced
2731 
2732    Notes:
2733    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2734    and usually the numerical values as well
2735 
2736    Any entries below the diagonal are ignored
2737 
2738 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2739 @*/
2740 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
2741   PetscFunctionBegin;
2742   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
2747   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2748   PetscInt    *indx;
2749   PetscScalar *values;
2750 
2751   PetscFunctionBegin;
2752   PetscCall(MatGetSize(inmat, &m, &N));
2753   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2754     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2755     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2756     PetscInt     *bindx, rmax = a->rmax, j;
2757     PetscMPIInt   rank, size;
2758 
2759     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2760     mbs = m / bs;
2761     Nbs = N / cbs;
2762     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2763     nbs = n / cbs;
2764 
2765     PetscCall(PetscMalloc1(rmax, &bindx));
2766     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2767 
2768     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2769     PetscCallMPI(MPI_Comm_rank(comm, &size));
2770     if (rank == size - 1) {
2771       /* Check sum(nbs) = Nbs */
2772       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2773     }
2774 
2775     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2776     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2777     for (i = 0; i < mbs; i++) {
2778       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2779       nnz = nnz / bs;
2780       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2781       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2782       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2783     }
2784     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2785     PetscCall(PetscFree(bindx));
2786 
2787     PetscCall(MatCreate(comm, outmat));
2788     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2789     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2790     PetscCall(MatSetType(*outmat, MATSBAIJ));
2791     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2792     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2793     MatPreallocateEnd(dnz, onz);
2794   }
2795 
2796   /* numeric phase */
2797   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2798   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2799 
2800   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2801   for (i = 0; i < m; i++) {
2802     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2803     Ii = i + rstart;
2804     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2805     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2806   }
2807   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2808   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2809   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2810   PetscFunctionReturn(0);
2811 }
2812