xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b758ae8c44070759d1facc3d2663bdd721e4de8f)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237     A->nonzerostate++; \
238   a_noinsert:; \
239     ailen[brow] = nrow; \
240   } while (0)
241 
242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
243   do { \
244     brow = row / bs; \
245     rp   = bj + bi[brow]; \
246     ap   = ba + bs2 * bi[brow]; \
247     rmax = bimax[brow]; \
248     nrow = bilen[brow]; \
249     bcol = col / bs; \
250     ridx = row % bs; \
251     cidx = col % bs; \
252     low  = 0; \
253     high = nrow; \
254     while (high - low > 3) { \
255       t = (low + high) / 2; \
256       if (rp[t] > bcol) high = t; \
257       else low = t; \
258     } \
259     for (_i = low; _i < high; _i++) { \
260       if (rp[_i] > bcol) break; \
261       if (rp[_i] == bcol) { \
262         bap = ap + bs2 * _i + bs * cidx + ridx; \
263         if (addv == ADD_VALUES) *bap += value; \
264         else *bap = value; \
265         goto b_noinsert; \
266       } \
267     } \
268     if (b->nonew == 1) goto b_noinsert; \
269     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
270     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
271     N = nrow++ - 1; \
272     /* shift up all the later entries in this row */ \
273     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
274     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
275     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
276     rp[_i]                          = bcol; \
277     ap[bs2 * _i + bs * cidx + ridx] = value; \
278     B->nonzerostate++; \
279   b_noinsert:; \
280     bilen[brow] = nrow; \
281   } while (0)
282 
283 /* Only add/insert a(i,j) with i<=j (blocks).
284    Any a(i,j) with i>j input by user is ignored or generates an error
285 */
286 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
287 {
288   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
289   MatScalar     value;
290   PetscBool     roworiented = baij->roworiented;
291   PetscInt      i, j, row, col;
292   PetscInt      rstart_orig = mat->rmap->rstart;
293   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
294   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
295 
296   /* Some Variables required in the macro */
297   Mat           A     = baij->A;
298   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
299   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
300   MatScalar    *aa = a->a;
301 
302   Mat          B     = baij->B;
303   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
304   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
305   MatScalar   *ba = b->a;
306 
307   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
308   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
309   MatScalar *ap, *bap;
310 
311   /* for stash */
312   PetscInt   n_loc, *in_loc = NULL;
313   MatScalar *v_loc = NULL;
314 
315   PetscFunctionBegin;
316   if (!baij->donotstash) {
317     if (n > baij->n_loc) {
318       PetscCall(PetscFree(baij->in_loc));
319       PetscCall(PetscFree(baij->v_loc));
320       PetscCall(PetscMalloc1(n, &baij->in_loc));
321       PetscCall(PetscMalloc1(n, &baij->v_loc));
322 
323       baij->n_loc = n;
324     }
325     in_loc = baij->in_loc;
326     v_loc  = baij->v_loc;
327   }
328 
329   for (i = 0; i < m; i++) {
330     if (im[i] < 0) continue;
331     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
332     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
333       row = im[i] - rstart_orig;                     /* local row index */
334       for (j = 0; j < n; j++) {
335         if (im[i] / bs > in[j] / bs) {
336           if (a->ignore_ltriangular) {
337             continue; /* ignore lower triangular blocks */
338           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
339         }
340         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
341           col  = in[j] - cstart_orig;                    /* local col index */
342           brow = row / bs;
343           bcol = col / bs;
344           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
345           if (roworiented) value = v[i * n + j];
346           else value = v[i + j * m];
347           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
348           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
349         } else if (in[j] < 0) {
350           continue;
351         } else {
352           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
353           /* off-diag entry (B) */
354           if (mat->was_assembled) {
355             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
356 #if defined(PETSC_USE_CTABLE)
357             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
358             col = col - 1;
359 #else
360             col = baij->colmap[in[j] / bs] - 1;
361 #endif
362             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
363               PetscCall(MatDisAssemble_MPISBAIJ(mat));
364               col = in[j];
365               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
366               B     = baij->B;
367               b     = (Mat_SeqBAIJ *)(B)->data;
368               bimax = b->imax;
369               bi    = b->i;
370               bilen = b->ilen;
371               bj    = b->j;
372               ba    = b->a;
373             } else col += in[j] % bs;
374           } else col = in[j];
375           if (roworiented) value = v[i * n + j];
376           else value = v[i + j * m];
377           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
378           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
379         }
380       }
381     } else { /* off processor entry */
382       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
383       if (!baij->donotstash) {
384         mat->assembled = PETSC_FALSE;
385         n_loc          = 0;
386         for (j = 0; j < n; j++) {
387           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
388           in_loc[n_loc] = in[j];
389           if (roworiented) {
390             v_loc[n_loc] = v[i * n + j];
391           } else {
392             v_loc[n_loc] = v[j * m + i];
393           }
394           n_loc++;
395         }
396         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
397       }
398     }
399   }
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
404 {
405   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
406   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
407   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
408   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
409   PetscBool          roworiented = a->roworiented;
410   const PetscScalar *value       = v;
411   MatScalar         *ap, *aa = a->a, *bap;
412 
413   PetscFunctionBegin;
414   if (col < row) {
415     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
416     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
417   }
418   rp    = aj + ai[row];
419   ap    = aa + bs2 * ai[row];
420   rmax  = imax[row];
421   nrow  = ailen[row];
422   value = v;
423   low   = 0;
424   high  = nrow;
425 
426   while (high - low > 7) {
427     t = (low + high) / 2;
428     if (rp[t] > col) high = t;
429     else low = t;
430   }
431   for (i = low; i < high; i++) {
432     if (rp[i] > col) break;
433     if (rp[i] == col) {
434       bap = ap + bs2 * i;
435       if (roworiented) {
436         if (is == ADD_VALUES) {
437           for (ii = 0; ii < bs; ii++) {
438             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
439           }
440         } else {
441           for (ii = 0; ii < bs; ii++) {
442             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
443           }
444         }
445       } else {
446         if (is == ADD_VALUES) {
447           for (ii = 0; ii < bs; ii++) {
448             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
449           }
450         } else {
451           for (ii = 0; ii < bs; ii++) {
452             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
453           }
454         }
455       }
456       goto noinsert2;
457     }
458   }
459   if (nonew == 1) goto noinsert2;
460   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
461   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
462   N = nrow++ - 1;
463   high++;
464   /* shift up all the later entries in this row */
465   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
466   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
467   rp[i] = col;
468   bap   = ap + bs2 * i;
469   if (roworiented) {
470     for (ii = 0; ii < bs; ii++) {
471       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
472     }
473   } else {
474     for (ii = 0; ii < bs; ii++) {
475       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
476     }
477   }
478 noinsert2:;
479   ailen[row] = nrow;
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 
483 /*
484    This routine is exactly duplicated in mpibaij.c
485 */
486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
487 {
488   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
489   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
490   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
491   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
492   PetscBool          roworiented = a->roworiented;
493   const PetscScalar *value       = v;
494   MatScalar         *ap, *aa = a->a, *bap;
495 
496   PetscFunctionBegin;
497   rp    = aj + ai[row];
498   ap    = aa + bs2 * ai[row];
499   rmax  = imax[row];
500   nrow  = ailen[row];
501   low   = 0;
502   high  = nrow;
503   value = v;
504   while (high - low > 7) {
505     t = (low + high) / 2;
506     if (rp[t] > col) high = t;
507     else low = t;
508   }
509   for (i = low; i < high; i++) {
510     if (rp[i] > col) break;
511     if (rp[i] == col) {
512       bap = ap + bs2 * i;
513       if (roworiented) {
514         if (is == ADD_VALUES) {
515           for (ii = 0; ii < bs; ii++) {
516             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
517           }
518         } else {
519           for (ii = 0; ii < bs; ii++) {
520             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
521           }
522         }
523       } else {
524         if (is == ADD_VALUES) {
525           for (ii = 0; ii < bs; ii++, value += bs) {
526             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
527             bap += bs;
528           }
529         } else {
530           for (ii = 0; ii < bs; ii++, value += bs) {
531             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
532             bap += bs;
533           }
534         }
535       }
536       goto noinsert2;
537     }
538   }
539   if (nonew == 1) goto noinsert2;
540   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
541   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
542   N = nrow++ - 1;
543   high++;
544   /* shift up all the later entries in this row */
545   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
546   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
547   rp[i] = col;
548   bap   = ap + bs2 * i;
549   if (roworiented) {
550     for (ii = 0; ii < bs; ii++) {
551       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
552     }
553   } else {
554     for (ii = 0; ii < bs; ii++) {
555       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
556     }
557   }
558 noinsert2:;
559   ailen[row] = nrow;
560   PetscFunctionReturn(PETSC_SUCCESS);
561 }
562 
563 /*
564     This routine could be optimized by removing the need for the block copy below and passing stride information
565   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
566 */
567 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
568 {
569   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
570   const MatScalar *value;
571   MatScalar       *barray      = baij->barray;
572   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
573   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
574   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
575   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
576 
577   PetscFunctionBegin;
578   if (!barray) {
579     PetscCall(PetscMalloc1(bs2, &barray));
580     baij->barray = barray;
581   }
582 
583   if (roworiented) {
584     stepval = (n - 1) * bs;
585   } else {
586     stepval = (m - 1) * bs;
587   }
588   for (i = 0; i < m; i++) {
589     if (im[i] < 0) continue;
590     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
591     if (im[i] >= rstart && im[i] < rend) {
592       row = im[i] - rstart;
593       for (j = 0; j < n; j++) {
594         if (im[i] > in[j]) {
595           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
596           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
597         }
598         /* If NumCol = 1 then a copy is not required */
599         if ((roworiented) && (n == 1)) {
600           barray = (MatScalar *)v + i * bs2;
601         } else if ((!roworiented) && (m == 1)) {
602           barray = (MatScalar *)v + j * bs2;
603         } else { /* Here a copy is required */
604           if (roworiented) {
605             value = v + i * (stepval + bs) * bs + j * bs;
606           } else {
607             value = v + j * (stepval + bs) * bs + i * bs;
608           }
609           for (ii = 0; ii < bs; ii++, value += stepval) {
610             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
611           }
612           barray -= bs2;
613         }
614 
615         if (in[j] >= cstart && in[j] < cend) {
616           col = in[j] - cstart;
617           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
618         } else if (in[j] < 0) {
619           continue;
620         } else {
621           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
622           if (mat->was_assembled) {
623             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624 
625 #if defined(PETSC_USE_CTABLE)
626             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
627             col = col < 1 ? -1 : (col - 1) / bs;
628 #else
629             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
630 #endif
631             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
632               PetscCall(MatDisAssemble_MPISBAIJ(mat));
633               col = in[j];
634             }
635           } else col = in[j];
636           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
637         }
638       }
639     } else {
640       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
641       if (!baij->donotstash) {
642         if (roworiented) {
643           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
644         } else {
645           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
646         }
647       }
648     }
649   }
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
654 {
655   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
656   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
657   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
658 
659   PetscFunctionBegin;
660   for (i = 0; i < m; i++) {
661     if (idxm[i] < 0) continue; /* negative row */
662     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
663     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
664       row = idxm[i] - bsrstart;
665       for (j = 0; j < n; j++) {
666         if (idxn[j] < 0) continue; /* negative column */
667         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
668         if (idxn[j] >= bscstart && idxn[j] < bscend) {
669           col = idxn[j] - bscstart;
670           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
671         } else {
672           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
673 #if defined(PETSC_USE_CTABLE)
674           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
675           data--;
676 #else
677           data = baij->colmap[idxn[j] / bs] - 1;
678 #endif
679           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
680           else {
681             col = data + idxn[j] % bs;
682             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
683           }
684         }
685       }
686     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
687   }
688   PetscFunctionReturn(PETSC_SUCCESS);
689 }
690 
691 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
692 {
693   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
694   PetscReal     sum[2], *lnorm2;
695 
696   PetscFunctionBegin;
697   if (baij->size == 1) {
698     PetscCall(MatNorm(baij->A, type, norm));
699   } else {
700     if (type == NORM_FROBENIUS) {
701       PetscCall(PetscMalloc1(2, &lnorm2));
702       PetscCall(MatNorm(baij->A, type, lnorm2));
703       *lnorm2 = (*lnorm2) * (*lnorm2);
704       lnorm2++; /* squar power of norm(A) */
705       PetscCall(MatNorm(baij->B, type, lnorm2));
706       *lnorm2 = (*lnorm2) * (*lnorm2);
707       lnorm2--; /* squar power of norm(B) */
708       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
709       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
710       PetscCall(PetscFree(lnorm2));
711     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
712       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
713       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
714       PetscReal    *rsum, *rsum2, vabs;
715       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
716       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
717       MatScalar    *v;
718 
719       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
720       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
721       /* Amat */
722       v  = amat->a;
723       jj = amat->j;
724       for (brow = 0; brow < mbs; brow++) {
725         grow = bs * (rstart + brow);
726         nz   = amat->i[brow + 1] - amat->i[brow];
727         for (bcol = 0; bcol < nz; bcol++) {
728           gcol = bs * (rstart + *jj);
729           jj++;
730           for (col = 0; col < bs; col++) {
731             for (row = 0; row < bs; row++) {
732               vabs = PetscAbsScalar(*v);
733               v++;
734               rsum[gcol + col] += vabs;
735               /* non-diagonal block */
736               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
737             }
738           }
739         }
740         PetscCall(PetscLogFlops(nz * bs * bs));
741       }
742       /* Bmat */
743       v  = bmat->a;
744       jj = bmat->j;
745       for (brow = 0; brow < mbs; brow++) {
746         grow = bs * (rstart + brow);
747         nz   = bmat->i[brow + 1] - bmat->i[brow];
748         for (bcol = 0; bcol < nz; bcol++) {
749           gcol = bs * garray[*jj];
750           jj++;
751           for (col = 0; col < bs; col++) {
752             for (row = 0; row < bs; row++) {
753               vabs = PetscAbsScalar(*v);
754               v++;
755               rsum[gcol + col] += vabs;
756               rsum[grow + row] += vabs;
757             }
758           }
759         }
760         PetscCall(PetscLogFlops(nz * bs * bs));
761       }
762       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
763       *norm = 0.0;
764       for (col = 0; col < mat->cmap->N; col++) {
765         if (rsum2[col] > *norm) *norm = rsum2[col];
766       }
767       PetscCall(PetscFree2(rsum, rsum2));
768     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
769   }
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
774 {
775   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
776   PetscInt      nstash, reallocs;
777 
778   PetscFunctionBegin;
779   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
780 
781   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
782   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
783   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
784   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
785   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
786   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
791 {
792   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
793   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
794   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
795   PetscInt     *row, *col;
796   PetscBool     other_disassembled;
797   PetscMPIInt   n;
798   PetscBool     r1, r2, r3;
799   MatScalar    *val;
800 
801   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
802   PetscFunctionBegin;
803   if (!baij->donotstash && !mat->nooffprocentries) {
804     while (1) {
805       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
806       if (!flg) break;
807 
808       for (i = 0; i < n;) {
809         /* Now identify the consecutive vals belonging to the same row */
810         for (j = i, rstart = row[j]; j < n; j++) {
811           if (row[j] != rstart) break;
812         }
813         if (j < n) ncols = j - i;
814         else ncols = n - i;
815         /* Now assemble all these values with a single function call */
816         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
817         i = j;
818       }
819     }
820     PetscCall(MatStashScatterEnd_Private(&mat->stash));
821     /* Now process the block-stash. Since the values are stashed column-oriented,
822        set the row-oriented flag to column-oriented, and after MatSetValues()
823        restore the original flags */
824     r1 = baij->roworiented;
825     r2 = a->roworiented;
826     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
827 
828     baij->roworiented = PETSC_FALSE;
829     a->roworiented    = PETSC_FALSE;
830 
831     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
832     while (1) {
833       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
834       if (!flg) break;
835 
836       for (i = 0; i < n;) {
837         /* Now identify the consecutive vals belonging to the same row */
838         for (j = i, rstart = row[j]; j < n; j++) {
839           if (row[j] != rstart) break;
840         }
841         if (j < n) ncols = j - i;
842         else ncols = n - i;
843         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
844         i = j;
845       }
846     }
847     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
848 
849     baij->roworiented = r1;
850     a->roworiented    = r2;
851 
852     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
853   }
854 
855   PetscCall(MatAssemblyBegin(baij->A, mode));
856   PetscCall(MatAssemblyEnd(baij->A, mode));
857 
858   /* determine if any processor has disassembled, if so we must
859      also disassemble ourselves, in order that we may reassemble. */
860   /*
861      if nonzero structure of submatrix B cannot change then we know that
862      no processor disassembled thus we can skip this stuff
863   */
864   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
865     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
866     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
867   }
868 
869   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
870   PetscCall(MatAssemblyBegin(baij->B, mode));
871   PetscCall(MatAssemblyEnd(baij->B, mode));
872 
873   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
874 
875   baij->rowvalues = NULL;
876 
877   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
878   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
879     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
880     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
881   }
882   PetscFunctionReturn(PETSC_SUCCESS);
883 }
884 
885 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
886 #include <petscdraw.h>
887 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
888 {
889   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
890   PetscInt          bs   = mat->rmap->bs;
891   PetscMPIInt       rank = baij->rank;
892   PetscBool         iascii, isdraw;
893   PetscViewer       sviewer;
894   PetscViewerFormat format;
895 
896   PetscFunctionBegin;
897   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
898   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
899   if (iascii) {
900     PetscCall(PetscViewerGetFormat(viewer, &format));
901     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
902       MatInfo info;
903       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
904       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
905       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
906       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
907                                                    mat->rmap->bs, (double)info.memory));
908       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
909       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
910       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
911       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
912       PetscCall(PetscViewerFlush(viewer));
913       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
914       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
915       PetscCall(VecScatterView(baij->Mvctx, viewer));
916       PetscFunctionReturn(PETSC_SUCCESS);
917     } else if (format == PETSC_VIEWER_ASCII_INFO) {
918       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
919       PetscFunctionReturn(PETSC_SUCCESS);
920     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
921       PetscFunctionReturn(PETSC_SUCCESS);
922     }
923   }
924 
925   if (isdraw) {
926     PetscDraw draw;
927     PetscBool isnull;
928     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
929     PetscCall(PetscDrawIsNull(draw, &isnull));
930     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
931   }
932 
933   {
934     /* assemble the entire matrix onto first processor. */
935     Mat           A;
936     Mat_SeqSBAIJ *Aloc;
937     Mat_SeqBAIJ  *Bloc;
938     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
939     MatScalar    *a;
940     const char   *matname;
941 
942     /* Should this be the same type as mat? */
943     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
944     if (rank == 0) {
945       PetscCall(MatSetSizes(A, M, N, M, N));
946     } else {
947       PetscCall(MatSetSizes(A, 0, 0, M, N));
948     }
949     PetscCall(MatSetType(A, MATMPISBAIJ));
950     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
951     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
955     ai   = Aloc->i;
956     aj   = Aloc->j;
957     a    = Aloc->a;
958     PetscCall(PetscMalloc1(bs, &rvals));
959 
960     for (i = 0; i < mbs; i++) {
961       rvals[0] = bs * (baij->rstartbs + i);
962       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
963       for (j = ai[i]; j < ai[i + 1]; j++) {
964         col = (baij->cstartbs + aj[j]) * bs;
965         for (k = 0; k < bs; k++) {
966           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
967           col++;
968           a += bs;
969         }
970       }
971     }
972     /* copy over the B part */
973     Bloc = (Mat_SeqBAIJ *)baij->B->data;
974     ai   = Bloc->i;
975     aj   = Bloc->j;
976     a    = Bloc->a;
977     for (i = 0; i < mbs; i++) {
978       rvals[0] = bs * (baij->rstartbs + i);
979       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
980       for (j = ai[i]; j < ai[i + 1]; j++) {
981         col = baij->garray[aj[j]] * bs;
982         for (k = 0; k < bs; k++) {
983           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
984           col++;
985           a += bs;
986         }
987       }
988     }
989     PetscCall(PetscFree(rvals));
990     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
991     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
992     /*
993        Everyone has to call to draw the matrix since the graphics waits are
994        synchronized across all processors that share the PetscDraw object
995     */
996     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
997     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
998     if (rank == 0) {
999       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
1000       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
1001     }
1002     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1003     PetscCall(MatDestroy(&A));
1004   }
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1009 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1010 
1011 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1012 {
1013   PetscBool iascii, isdraw, issocket, isbinary;
1014 
1015   PetscFunctionBegin;
1016   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1020   if (iascii || isdraw || issocket) {
1021     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1022   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1023   PetscFunctionReturn(PETSC_SUCCESS);
1024 }
1025 
1026 #if defined(PETSC_USE_COMPLEX)
1027 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1028 {
1029   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1030   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1031   PetscScalar       *from;
1032   const PetscScalar *x;
1033 
1034   PetscFunctionBegin;
1035   /* diagonal part */
1036   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1037   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1038   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1039   PetscCall(VecZeroEntries(a->slvec1b));
1040 
1041   /* subdiagonal part */
1042   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1043   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1044 
1045   /* copy x into the vec slvec0 */
1046   PetscCall(VecGetArray(a->slvec0, &from));
1047   PetscCall(VecGetArrayRead(xx, &x));
1048 
1049   PetscCall(PetscArraycpy(from, x, bs * mbs));
1050   PetscCall(VecRestoreArray(a->slvec0, &from));
1051   PetscCall(VecRestoreArrayRead(xx, &x));
1052 
1053   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1054   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1055   /* supperdiagonal part */
1056   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1057   PetscFunctionReturn(PETSC_SUCCESS);
1058 }
1059 #endif
1060 
1061 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1062 {
1063   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1064   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1065   PetscScalar       *from;
1066   const PetscScalar *x;
1067 
1068   PetscFunctionBegin;
1069   /* diagonal part */
1070   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1071   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1072   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1073   PetscCall(VecZeroEntries(a->slvec1b));
1074 
1075   /* subdiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1077 
1078   /* copy x into the vec slvec0 */
1079   PetscCall(VecGetArray(a->slvec0, &from));
1080   PetscCall(VecGetArrayRead(xx, &x));
1081 
1082   PetscCall(PetscArraycpy(from, x, bs * mbs));
1083   PetscCall(VecRestoreArray(a->slvec0, &from));
1084   PetscCall(VecRestoreArrayRead(xx, &x));
1085 
1086   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1087   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1088   /* supperdiagonal part */
1089   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1090   PetscFunctionReturn(PETSC_SUCCESS);
1091 }
1092 
1093 #if PetscDefined(USE_COMPLEX)
1094 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1095 {
1096   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1097   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1098   PetscScalar       *from;
1099   const PetscScalar *x;
1100 
1101   PetscFunctionBegin;
1102   /* diagonal part */
1103   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1104   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1105   PetscCall(VecZeroEntries(a->slvec1b));
1106 
1107   /* subdiagonal part */
1108   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1109   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1110 
1111   /* copy x into the vec slvec0 */
1112   PetscCall(VecGetArray(a->slvec0, &from));
1113   PetscCall(VecGetArrayRead(xx, &x));
1114   PetscCall(PetscArraycpy(from, x, bs * mbs));
1115   PetscCall(VecRestoreArray(a->slvec0, &from));
1116 
1117   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1118   PetscCall(VecRestoreArrayRead(xx, &x));
1119   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1120 
1121   /* supperdiagonal part */
1122   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 #endif
1126 
1127 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1128 {
1129   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1130   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1131   PetscScalar       *from;
1132   const PetscScalar *x;
1133 
1134   PetscFunctionBegin;
1135   /* diagonal part */
1136   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1137   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1138   PetscCall(VecZeroEntries(a->slvec1b));
1139 
1140   /* subdiagonal part */
1141   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1142 
1143   /* copy x into the vec slvec0 */
1144   PetscCall(VecGetArray(a->slvec0, &from));
1145   PetscCall(VecGetArrayRead(xx, &x));
1146   PetscCall(PetscArraycpy(from, x, bs * mbs));
1147   PetscCall(VecRestoreArray(a->slvec0, &from));
1148 
1149   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1150   PetscCall(VecRestoreArrayRead(xx, &x));
1151   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1152 
1153   /* supperdiagonal part */
1154   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1155   PetscFunctionReturn(PETSC_SUCCESS);
1156 }
1157 
1158 /*
1159   This only works correctly for square matrices where the subblock A->A is the
1160    diagonal block
1161 */
1162 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1163 {
1164   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1165 
1166   PetscFunctionBegin;
1167   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1168   PetscCall(MatGetDiagonal(a->A, v));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1173 {
1174   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatScale(a->A, aa));
1178   PetscCall(MatScale(a->B, aa));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1183 {
1184   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1185   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1186   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1187   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1188   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1189 
1190   PetscFunctionBegin;
1191   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1192   mat->getrowactive = PETSC_TRUE;
1193 
1194   if (!mat->rowvalues && (idx || v)) {
1195     /*
1196         allocate enough space to hold information from the longest row.
1197     */
1198     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1199     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1200     PetscInt      max = 1, mbs = mat->mbs, tmp;
1201     for (i = 0; i < mbs; i++) {
1202       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1203       if (max < tmp) max = tmp;
1204     }
1205     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1206   }
1207 
1208   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1209   lrow = row - brstart; /* local row index */
1210 
1211   pvA = &vworkA;
1212   pcA = &cworkA;
1213   pvB = &vworkB;
1214   pcB = &cworkB;
1215   if (!v) {
1216     pvA = NULL;
1217     pvB = NULL;
1218   }
1219   if (!idx) {
1220     pcA = NULL;
1221     if (!v) pcB = NULL;
1222   }
1223   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1224   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1225   nztot = nzA + nzB;
1226 
1227   cmap = mat->garray;
1228   if (v || idx) {
1229     if (nztot) {
1230       /* Sort by increasing column numbers, assuming A and B already sorted */
1231       PetscInt imark = -1;
1232       if (v) {
1233         *v = v_p = mat->rowvalues;
1234         for (i = 0; i < nzB; i++) {
1235           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1236           else break;
1237         }
1238         imark = i;
1239         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1240         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1241       }
1242       if (idx) {
1243         *idx = idx_p = mat->rowindices;
1244         if (imark > -1) {
1245           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1246         } else {
1247           for (i = 0; i < nzB; i++) {
1248             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1249             else break;
1250           }
1251           imark = i;
1252         }
1253         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1254         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1255       }
1256     } else {
1257       if (idx) *idx = NULL;
1258       if (v) *v = NULL;
1259     }
1260   }
1261   *nz = nztot;
1262   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1263   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1264   PetscFunctionReturn(PETSC_SUCCESS);
1265 }
1266 
1267 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1268 {
1269   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1270 
1271   PetscFunctionBegin;
1272   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1273   baij->getrowactive = PETSC_FALSE;
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1278 {
1279   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1280   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1281 
1282   PetscFunctionBegin;
1283   aA->getrow_utriangular = PETSC_TRUE;
1284   PetscFunctionReturn(PETSC_SUCCESS);
1285 }
1286 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1287 {
1288   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1289   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1290 
1291   PetscFunctionBegin;
1292   aA->getrow_utriangular = PETSC_FALSE;
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 
1296 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1297 {
1298   PetscFunctionBegin;
1299   if (PetscDefined(USE_COMPLEX)) {
1300     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1301 
1302     PetscCall(MatConjugate(a->A));
1303     PetscCall(MatConjugate(a->B));
1304   }
1305   PetscFunctionReturn(PETSC_SUCCESS);
1306 }
1307 
1308 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1309 {
1310   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1311 
1312   PetscFunctionBegin;
1313   PetscCall(MatRealPart(a->A));
1314   PetscCall(MatRealPart(a->B));
1315   PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317 
1318 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1319 {
1320   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1321 
1322   PetscFunctionBegin;
1323   PetscCall(MatImaginaryPart(a->A));
1324   PetscCall(MatImaginaryPart(a->B));
1325   PetscFunctionReturn(PETSC_SUCCESS);
1326 }
1327 
1328 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1329    Input: isrow       - distributed(parallel),
1330           iscol_local - locally owned (seq)
1331 */
1332 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1333 {
1334   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1335   const PetscInt *ptr1, *ptr2;
1336 
1337   PetscFunctionBegin;
1338   *flg = PETSC_FALSE;
1339   PetscCall(ISGetLocalSize(isrow, &sz1));
1340   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1341   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1342 
1343   PetscCall(ISGetIndices(isrow, &ptr1));
1344   PetscCall(ISGetIndices(iscol_local, &ptr2));
1345 
1346   PetscCall(PetscMalloc1(sz1, &a1));
1347   PetscCall(PetscMalloc1(sz2, &a2));
1348   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1349   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1350   PetscCall(PetscSortInt(sz1, a1));
1351   PetscCall(PetscSortInt(sz2, a2));
1352 
1353   nmatch = 0;
1354   k      = 0;
1355   for (i = 0; i < sz1; i++) {
1356     for (j = k; j < sz2; j++) {
1357       if (a1[i] == a2[j]) {
1358         k = j;
1359         nmatch++;
1360         break;
1361       }
1362     }
1363   }
1364   PetscCall(ISRestoreIndices(isrow, &ptr1));
1365   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1366   PetscCall(PetscFree(a1));
1367   PetscCall(PetscFree(a2));
1368   if (nmatch < sz1) {
1369     *flg = PETSC_FALSE;
1370   } else {
1371     *flg = PETSC_TRUE;
1372   }
1373   PetscFunctionReturn(PETSC_SUCCESS);
1374 }
1375 
1376 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1377 {
1378   Mat       C[2];
1379   IS        iscol_local, isrow_local;
1380   PetscInt  csize, csize_local, rsize;
1381   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1382 
1383   PetscFunctionBegin;
1384   PetscCall(ISGetLocalSize(iscol, &csize));
1385   PetscCall(ISGetLocalSize(isrow, &rsize));
1386   if (call == MAT_REUSE_MATRIX) {
1387     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1388     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1389   } else {
1390     PetscCall(ISAllGather(iscol, &iscol_local));
1391     PetscCall(ISSorted(iscol_local, &issorted));
1392     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1393   }
1394   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1395   if (!isequal) {
1396     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1397     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1398     if (!isidentity) {
1399       if (call == MAT_REUSE_MATRIX) {
1400         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1401         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1402       } else {
1403         PetscCall(ISAllGather(isrow, &isrow_local));
1404         PetscCall(ISSorted(isrow_local, &issorted));
1405         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1406       }
1407     }
1408   }
1409   /* now call MatCreateSubMatrix_MPIBAIJ() */
1410   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1411   if (!isequal && !isidentity) {
1412     if (call == MAT_INITIAL_MATRIX) {
1413       IS       intersect;
1414       PetscInt ni;
1415 
1416       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1417       PetscCall(ISGetLocalSize(intersect, &ni));
1418       PetscCall(ISDestroy(&intersect));
1419       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1420     }
1421     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1422     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1423     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1424     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1425     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1426     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1427     PetscCall(MatDestroy(C));
1428     PetscCall(MatDestroy(C + 1));
1429   }
1430   if (call == MAT_INITIAL_MATRIX) {
1431     if (!isequal && !isidentity) {
1432       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1433       PetscCall(ISDestroy(&isrow_local));
1434     }
1435     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1436     PetscCall(ISDestroy(&iscol_local));
1437   }
1438   PetscFunctionReturn(PETSC_SUCCESS);
1439 }
1440 
1441 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1442 {
1443   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1444 
1445   PetscFunctionBegin;
1446   PetscCall(MatZeroEntries(l->A));
1447   PetscCall(MatZeroEntries(l->B));
1448   PetscFunctionReturn(PETSC_SUCCESS);
1449 }
1450 
1451 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1452 {
1453   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1454   Mat            A = a->A, B = a->B;
1455   PetscLogDouble isend[5], irecv[5];
1456 
1457   PetscFunctionBegin;
1458   info->block_size = (PetscReal)matin->rmap->bs;
1459 
1460   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1461 
1462   isend[0] = info->nz_used;
1463   isend[1] = info->nz_allocated;
1464   isend[2] = info->nz_unneeded;
1465   isend[3] = info->memory;
1466   isend[4] = info->mallocs;
1467 
1468   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1469 
1470   isend[0] += info->nz_used;
1471   isend[1] += info->nz_allocated;
1472   isend[2] += info->nz_unneeded;
1473   isend[3] += info->memory;
1474   isend[4] += info->mallocs;
1475   if (flag == MAT_LOCAL) {
1476     info->nz_used      = isend[0];
1477     info->nz_allocated = isend[1];
1478     info->nz_unneeded  = isend[2];
1479     info->memory       = isend[3];
1480     info->mallocs      = isend[4];
1481   } else if (flag == MAT_GLOBAL_MAX) {
1482     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1483 
1484     info->nz_used      = irecv[0];
1485     info->nz_allocated = irecv[1];
1486     info->nz_unneeded  = irecv[2];
1487     info->memory       = irecv[3];
1488     info->mallocs      = irecv[4];
1489   } else if (flag == MAT_GLOBAL_SUM) {
1490     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1491 
1492     info->nz_used      = irecv[0];
1493     info->nz_allocated = irecv[1];
1494     info->nz_unneeded  = irecv[2];
1495     info->memory       = irecv[3];
1496     info->mallocs      = irecv[4];
1497   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1498   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1499   info->fill_ratio_needed = 0;
1500   info->factor_mallocs    = 0;
1501   PetscFunctionReturn(PETSC_SUCCESS);
1502 }
1503 
1504 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1505 {
1506   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1507   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1508 
1509   PetscFunctionBegin;
1510   switch (op) {
1511   case MAT_NEW_NONZERO_LOCATIONS:
1512   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1513   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1514   case MAT_KEEP_NONZERO_PATTERN:
1515   case MAT_SUBMAT_SINGLEIS:
1516   case MAT_NEW_NONZERO_LOCATION_ERR:
1517     MatCheckPreallocated(A, 1);
1518     PetscCall(MatSetOption(a->A, op, flg));
1519     PetscCall(MatSetOption(a->B, op, flg));
1520     break;
1521   case MAT_ROW_ORIENTED:
1522     MatCheckPreallocated(A, 1);
1523     a->roworiented = flg;
1524 
1525     PetscCall(MatSetOption(a->A, op, flg));
1526     PetscCall(MatSetOption(a->B, op, flg));
1527     break;
1528   case MAT_FORCE_DIAGONAL_ENTRIES:
1529   case MAT_SORTED_FULL:
1530     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1531     break;
1532   case MAT_IGNORE_OFF_PROC_ENTRIES:
1533     a->donotstash = flg;
1534     break;
1535   case MAT_USE_HASH_TABLE:
1536     a->ht_flag = flg;
1537     break;
1538   case MAT_HERMITIAN:
1539     MatCheckPreallocated(A, 1);
1540     PetscCall(MatSetOption(a->A, op, flg));
1541 #if defined(PETSC_USE_COMPLEX)
1542     if (flg) { /* need different mat-vec ops */
1543       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1544       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1545       A->ops->multtranspose    = NULL;
1546       A->ops->multtransposeadd = NULL;
1547       A->symmetric             = PETSC_BOOL3_FALSE;
1548     }
1549 #endif
1550     break;
1551   case MAT_SPD:
1552   case MAT_SYMMETRIC:
1553     MatCheckPreallocated(A, 1);
1554     PetscCall(MatSetOption(a->A, op, flg));
1555 #if defined(PETSC_USE_COMPLEX)
1556     if (flg) { /* restore to use default mat-vec ops */
1557       A->ops->mult             = MatMult_MPISBAIJ;
1558       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1559       A->ops->multtranspose    = MatMult_MPISBAIJ;
1560       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1561     }
1562 #endif
1563     break;
1564   case MAT_STRUCTURALLY_SYMMETRIC:
1565     MatCheckPreallocated(A, 1);
1566     PetscCall(MatSetOption(a->A, op, flg));
1567     break;
1568   case MAT_SYMMETRY_ETERNAL:
1569   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1570     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1571     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1572     break;
1573   case MAT_SPD_ETERNAL:
1574     break;
1575   case MAT_IGNORE_LOWER_TRIANGULAR:
1576     aA->ignore_ltriangular = flg;
1577     break;
1578   case MAT_ERROR_LOWER_TRIANGULAR:
1579     aA->ignore_ltriangular = flg;
1580     break;
1581   case MAT_GETROW_UPPERTRIANGULAR:
1582     aA->getrow_utriangular = flg;
1583     break;
1584   default:
1585     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1586   }
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
1590 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1591 {
1592   PetscFunctionBegin;
1593   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1594   if (reuse == MAT_INITIAL_MATRIX) {
1595     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1596   } else if (reuse == MAT_REUSE_MATRIX) {
1597     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1598   }
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1603 {
1604   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1605   Mat           a = baij->A, b = baij->B;
1606   PetscInt      nv, m, n;
1607   PetscBool     flg;
1608 
1609   PetscFunctionBegin;
1610   if (ll != rr) {
1611     PetscCall(VecEqual(ll, rr, &flg));
1612     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1613   }
1614   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1615 
1616   PetscCall(MatGetLocalSize(mat, &m, &n));
1617   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1618 
1619   PetscCall(VecGetLocalSize(rr, &nv));
1620   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1621 
1622   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1623 
1624   /* left diagonalscale the off-diagonal part */
1625   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1626 
1627   /* scale the diagonal part */
1628   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1629 
1630   /* right diagonalscale the off-diagonal part */
1631   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1632   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1633   PetscFunctionReturn(PETSC_SUCCESS);
1634 }
1635 
1636 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1637 {
1638   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1639 
1640   PetscFunctionBegin;
1641   PetscCall(MatSetUnfactored(a->A));
1642   PetscFunctionReturn(PETSC_SUCCESS);
1643 }
1644 
1645 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1646 
1647 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1648 {
1649   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1650   Mat           a, b, c, d;
1651   PetscBool     flg;
1652 
1653   PetscFunctionBegin;
1654   a = matA->A;
1655   b = matA->B;
1656   c = matB->A;
1657   d = matB->B;
1658 
1659   PetscCall(MatEqual(a, c, &flg));
1660   if (flg) PetscCall(MatEqual(b, d, &flg));
1661   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1666 {
1667   PetscBool isbaij;
1668 
1669   PetscFunctionBegin;
1670   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1671   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1672   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1673   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1674     PetscCall(MatGetRowUpperTriangular(A));
1675     PetscCall(MatCopy_Basic(A, B, str));
1676     PetscCall(MatRestoreRowUpperTriangular(A));
1677   } else {
1678     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1679     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1680 
1681     PetscCall(MatCopy(a->A, b->A, str));
1682     PetscCall(MatCopy(a->B, b->B, str));
1683   }
1684   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1685   PetscFunctionReturn(PETSC_SUCCESS);
1686 }
1687 
1688 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1689 {
1690   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1691   PetscBLASInt  bnz, one                          = 1;
1692   Mat_SeqSBAIJ *xa, *ya;
1693   Mat_SeqBAIJ  *xb, *yb;
1694 
1695   PetscFunctionBegin;
1696   if (str == SAME_NONZERO_PATTERN) {
1697     PetscScalar alpha = a;
1698     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1699     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1700     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1701     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1702     xb = (Mat_SeqBAIJ *)xx->B->data;
1703     yb = (Mat_SeqBAIJ *)yy->B->data;
1704     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1705     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1706     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1707   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1708     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1709     PetscCall(MatAXPY_Basic(Y, a, X, str));
1710     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1711   } else {
1712     Mat       B;
1713     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1714     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1715     PetscCall(MatGetRowUpperTriangular(X));
1716     PetscCall(MatGetRowUpperTriangular(Y));
1717     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1718     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1719     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1720     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1721     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1722     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1723     PetscCall(MatSetType(B, MATMPISBAIJ));
1724     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1725     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1726     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1727     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1728     PetscCall(MatHeaderMerge(Y, &B));
1729     PetscCall(PetscFree(nnz_d));
1730     PetscCall(PetscFree(nnz_o));
1731     PetscCall(MatRestoreRowUpperTriangular(X));
1732     PetscCall(MatRestoreRowUpperTriangular(Y));
1733   }
1734   PetscFunctionReturn(PETSC_SUCCESS);
1735 }
1736 
1737 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1738 {
1739   PetscInt  i;
1740   PetscBool flg;
1741 
1742   PetscFunctionBegin;
1743   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1744   for (i = 0; i < n; i++) {
1745     PetscCall(ISEqual(irow[i], icol[i], &flg));
1746     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1747   }
1748   PetscFunctionReturn(PETSC_SUCCESS);
1749 }
1750 
1751 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1752 {
1753   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1754   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1755 
1756   PetscFunctionBegin;
1757   if (!Y->preallocated) {
1758     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1759   } else if (!aij->nz) {
1760     PetscInt nonew = aij->nonew;
1761     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1762     aij->nonew = nonew;
1763   }
1764   PetscCall(MatShift_Basic(Y, a));
1765   PetscFunctionReturn(PETSC_SUCCESS);
1766 }
1767 
1768 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1769 {
1770   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1771 
1772   PetscFunctionBegin;
1773   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1774   PetscCall(MatMissingDiagonal(a->A, missing, d));
1775   if (d) {
1776     PetscInt rstart;
1777     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1778     *d += rstart / A->rmap->bs;
1779   }
1780   PetscFunctionReturn(PETSC_SUCCESS);
1781 }
1782 
1783 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1784 {
1785   PetscFunctionBegin;
1786   *a = ((Mat_MPISBAIJ *)A->data)->A;
1787   PetscFunctionReturn(PETSC_SUCCESS);
1788 }
1789 
1790 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1791 {
1792   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1793 
1794   PetscFunctionBegin;
1795   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1796   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1797   PetscFunctionReturn(PETSC_SUCCESS);
1798 }
1799 
1800 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1801 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1802 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1803 
1804 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1805                                        MatGetRow_MPISBAIJ,
1806                                        MatRestoreRow_MPISBAIJ,
1807                                        MatMult_MPISBAIJ,
1808                                        /*  4*/ MatMultAdd_MPISBAIJ,
1809                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1810                                        MatMultAdd_MPISBAIJ,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        /* 10*/ NULL,
1815                                        NULL,
1816                                        NULL,
1817                                        MatSOR_MPISBAIJ,
1818                                        MatTranspose_MPISBAIJ,
1819                                        /* 15*/ MatGetInfo_MPISBAIJ,
1820                                        MatEqual_MPISBAIJ,
1821                                        MatGetDiagonal_MPISBAIJ,
1822                                        MatDiagonalScale_MPISBAIJ,
1823                                        MatNorm_MPISBAIJ,
1824                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1825                                        MatAssemblyEnd_MPISBAIJ,
1826                                        MatSetOption_MPISBAIJ,
1827                                        MatZeroEntries_MPISBAIJ,
1828                                        /* 24*/ NULL,
1829                                        NULL,
1830                                        NULL,
1831                                        NULL,
1832                                        NULL,
1833                                        /* 29*/ MatSetUp_MPI_Hash,
1834                                        NULL,
1835                                        NULL,
1836                                        MatGetDiagonalBlock_MPISBAIJ,
1837                                        NULL,
1838                                        /* 34*/ MatDuplicate_MPISBAIJ,
1839                                        NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        /* 39*/ MatAXPY_MPISBAIJ,
1844                                        MatCreateSubMatrices_MPISBAIJ,
1845                                        MatIncreaseOverlap_MPISBAIJ,
1846                                        MatGetValues_MPISBAIJ,
1847                                        MatCopy_MPISBAIJ,
1848                                        /* 44*/ NULL,
1849                                        MatScale_MPISBAIJ,
1850                                        MatShift_MPISBAIJ,
1851                                        NULL,
1852                                        NULL,
1853                                        /* 49*/ NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        /* 54*/ NULL,
1859                                        NULL,
1860                                        MatSetUnfactored_MPISBAIJ,
1861                                        NULL,
1862                                        MatSetValuesBlocked_MPISBAIJ,
1863                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1864                                        NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        /* 64*/ NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1874                                        NULL,
1875                                        MatConvert_MPISBAIJ_Basic,
1876                                        NULL,
1877                                        NULL,
1878                                        /* 74*/ NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        /* 79*/ NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        MatLoad_MPISBAIJ,
1888                                        /* 84*/ NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        /* 89*/ NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        /* 94*/ NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        /* 99*/ NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        MatConjugate_MPISBAIJ,
1907                                        NULL,
1908                                        /*104*/ NULL,
1909                                        MatRealPart_MPISBAIJ,
1910                                        MatImaginaryPart_MPISBAIJ,
1911                                        MatGetRowUpperTriangular_MPISBAIJ,
1912                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1913                                        /*109*/ NULL,
1914                                        NULL,
1915                                        NULL,
1916                                        NULL,
1917                                        MatMissingDiagonal_MPISBAIJ,
1918                                        /*114*/ NULL,
1919                                        NULL,
1920                                        NULL,
1921                                        NULL,
1922                                        NULL,
1923                                        /*119*/ NULL,
1924                                        NULL,
1925                                        NULL,
1926                                        NULL,
1927                                        NULL,
1928                                        /*124*/ NULL,
1929                                        NULL,
1930                                        NULL,
1931                                        NULL,
1932                                        NULL,
1933                                        /*129*/ NULL,
1934                                        NULL,
1935                                        NULL,
1936                                        NULL,
1937                                        NULL,
1938                                        /*134*/ NULL,
1939                                        NULL,
1940                                        NULL,
1941                                        NULL,
1942                                        NULL,
1943                                        /*139*/ MatSetBlockSizes_Default,
1944                                        NULL,
1945                                        NULL,
1946                                        NULL,
1947                                        NULL,
1948                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1949                                        NULL,
1950                                        NULL,
1951                                        NULL,
1952                                        NULL,
1953                                        NULL,
1954                                        /*150*/ NULL,
1955                                        MatEliminateZeros_MPISBAIJ,
1956                                        NULL};
1957 
1958 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1959 {
1960   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1961   PetscInt      i, mbs, Mbs;
1962   PetscMPIInt   size;
1963 
1964   PetscFunctionBegin;
1965   if (B->hash_active) {
1966     B->ops[0]      = b->cops;
1967     B->hash_active = PETSC_FALSE;
1968   }
1969   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1970   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1971   PetscCall(PetscLayoutSetUp(B->rmap));
1972   PetscCall(PetscLayoutSetUp(B->cmap));
1973   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1974   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1975   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1976 
1977   mbs = B->rmap->n / bs;
1978   Mbs = B->rmap->N / bs;
1979   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1980 
1981   B->rmap->bs = bs;
1982   b->bs2      = bs * bs;
1983   b->mbs      = mbs;
1984   b->Mbs      = Mbs;
1985   b->nbs      = B->cmap->n / bs;
1986   b->Nbs      = B->cmap->N / bs;
1987 
1988   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1989   b->rstartbs = B->rmap->rstart / bs;
1990   b->rendbs   = B->rmap->rend / bs;
1991 
1992   b->cstartbs = B->cmap->rstart / bs;
1993   b->cendbs   = B->cmap->rend / bs;
1994 
1995 #if defined(PETSC_USE_CTABLE)
1996   PetscCall(PetscHMapIDestroy(&b->colmap));
1997 #else
1998   PetscCall(PetscFree(b->colmap));
1999 #endif
2000   PetscCall(PetscFree(b->garray));
2001   PetscCall(VecDestroy(&b->lvec));
2002   PetscCall(VecScatterDestroy(&b->Mvctx));
2003   PetscCall(VecDestroy(&b->slvec0));
2004   PetscCall(VecDestroy(&b->slvec0b));
2005   PetscCall(VecDestroy(&b->slvec1));
2006   PetscCall(VecDestroy(&b->slvec1a));
2007   PetscCall(VecDestroy(&b->slvec1b));
2008   PetscCall(VecScatterDestroy(&b->sMvctx));
2009 
2010   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2011 
2012   MatSeqXAIJGetOptions_Private(b->B);
2013   PetscCall(MatDestroy(&b->B));
2014   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2015   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2016   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2017   MatSeqXAIJRestoreOptions_Private(b->B);
2018 
2019   MatSeqXAIJGetOptions_Private(b->A);
2020   PetscCall(MatDestroy(&b->A));
2021   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2022   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2023   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
2024   MatSeqXAIJRestoreOptions_Private(b->A);
2025 
2026   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2027   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2028 
2029   B->preallocated  = PETSC_TRUE;
2030   B->was_assembled = PETSC_FALSE;
2031   B->assembled     = PETSC_FALSE;
2032   PetscFunctionReturn(PETSC_SUCCESS);
2033 }
2034 
2035 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2036 {
2037   PetscInt        m, rstart, cend;
2038   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2039   const PetscInt *JJ          = NULL;
2040   PetscScalar    *values      = NULL;
2041   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2042   PetscBool       nooffprocentries;
2043 
2044   PetscFunctionBegin;
2045   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2046   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2047   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2048   PetscCall(PetscLayoutSetUp(B->rmap));
2049   PetscCall(PetscLayoutSetUp(B->cmap));
2050   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2051   m      = B->rmap->n / bs;
2052   rstart = B->rmap->rstart / bs;
2053   cend   = B->cmap->rend / bs;
2054 
2055   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2056   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2057   for (i = 0; i < m; i++) {
2058     nz = ii[i + 1] - ii[i];
2059     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2060     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2061     JJ = jj + ii[i];
2062     bd = 0;
2063     for (j = 0; j < nz; j++) {
2064       if (*JJ >= i + rstart) break;
2065       JJ++;
2066       bd++;
2067     }
2068     d = 0;
2069     for (; j < nz; j++) {
2070       if (*JJ++ >= cend) break;
2071       d++;
2072     }
2073     d_nnz[i] = d;
2074     o_nnz[i] = nz - d - bd;
2075     nz       = nz - bd;
2076     nz_max   = PetscMax(nz_max, nz);
2077   }
2078   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2079   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2080   PetscCall(PetscFree2(d_nnz, o_nnz));
2081 
2082   values = (PetscScalar *)V;
2083   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2084   for (i = 0; i < m; i++) {
2085     PetscInt        row   = i + rstart;
2086     PetscInt        ncols = ii[i + 1] - ii[i];
2087     const PetscInt *icols = jj + ii[i];
2088     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2089       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2090       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2091     } else { /* block ordering does not match so we can only insert one block at a time. */
2092       PetscInt j;
2093       for (j = 0; j < ncols; j++) {
2094         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2095         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2096       }
2097     }
2098   }
2099 
2100   if (!V) PetscCall(PetscFree(values));
2101   nooffprocentries    = B->nooffprocentries;
2102   B->nooffprocentries = PETSC_TRUE;
2103   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2104   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2105   B->nooffprocentries = nooffprocentries;
2106 
2107   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2108   PetscFunctionReturn(PETSC_SUCCESS);
2109 }
2110 
2111 /*MC
2112    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2113    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2114    the matrix is stored.
2115 
2116    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2117    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2118 
2119    Options Database Key:
2120 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2121 
2122    Level: beginner
2123 
2124    Note:
2125      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2126      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2127 
2128 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2129 M*/
2130 
2131 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2132 {
2133   Mat_MPISBAIJ *b;
2134   PetscBool     flg = PETSC_FALSE;
2135 
2136   PetscFunctionBegin;
2137   PetscCall(PetscNew(&b));
2138   B->data   = (void *)b;
2139   B->ops[0] = MatOps_Values;
2140 
2141   B->ops->destroy = MatDestroy_MPISBAIJ;
2142   B->ops->view    = MatView_MPISBAIJ;
2143   B->assembled    = PETSC_FALSE;
2144   B->insertmode   = NOT_SET_VALUES;
2145 
2146   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2147   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2148 
2149   /* build local table of row and column ownerships */
2150   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2151 
2152   /* build cache for off array entries formed */
2153   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2154 
2155   b->donotstash  = PETSC_FALSE;
2156   b->colmap      = NULL;
2157   b->garray      = NULL;
2158   b->roworiented = PETSC_TRUE;
2159 
2160   /* stuff used in block assembly */
2161   b->barray = NULL;
2162 
2163   /* stuff used for matrix vector multiply */
2164   b->lvec    = NULL;
2165   b->Mvctx   = NULL;
2166   b->slvec0  = NULL;
2167   b->slvec0b = NULL;
2168   b->slvec1  = NULL;
2169   b->slvec1a = NULL;
2170   b->slvec1b = NULL;
2171   b->sMvctx  = NULL;
2172 
2173   /* stuff for MatGetRow() */
2174   b->rowindices   = NULL;
2175   b->rowvalues    = NULL;
2176   b->getrowactive = PETSC_FALSE;
2177 
2178   /* hash table stuff */
2179   b->ht           = NULL;
2180   b->hd           = NULL;
2181   b->ht_size      = 0;
2182   b->ht_flag      = PETSC_FALSE;
2183   b->ht_fact      = 0;
2184   b->ht_total_ct  = 0;
2185   b->ht_insert_ct = 0;
2186 
2187   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2188   b->ijonly = PETSC_FALSE;
2189 
2190   b->in_loc = NULL;
2191   b->v_loc  = NULL;
2192   b->n_loc  = 0;
2193 
2194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2198 #if defined(PETSC_HAVE_ELEMENTAL)
2199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2200 #endif
2201 #if defined(PETSC_HAVE_SCALAPACK)
2202   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2203 #endif
2204   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2205   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2206 
2207   B->symmetric                   = PETSC_BOOL3_TRUE;
2208   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2209   B->symmetry_eternal            = PETSC_TRUE;
2210   B->structural_symmetry_eternal = PETSC_TRUE;
2211 #if defined(PETSC_USE_COMPLEX)
2212   B->hermitian = PETSC_BOOL3_FALSE;
2213 #else
2214   B->hermitian = PETSC_BOOL3_TRUE;
2215 #endif
2216 
2217   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2218   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2219   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2220   if (flg) {
2221     PetscReal fact = 1.39;
2222     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2223     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2224     if (fact <= 1.0) fact = 1.39;
2225     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2226     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2227   }
2228   PetscOptionsEnd();
2229   PetscFunctionReturn(PETSC_SUCCESS);
2230 }
2231 
2232 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2233 /*MC
2234    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2235 
2236    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2237    and `MATMPISBAIJ` otherwise.
2238 
2239    Options Database Key:
2240 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2241 
2242   Level: beginner
2243 
2244 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2245 M*/
2246 
2247 /*@C
2248   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2249   the user should preallocate the matrix storage by setting the parameters
2250   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2251   performance can be increased by more than a factor of 50.
2252 
2253   Collective
2254 
2255   Input Parameters:
2256 + B     - the matrix
2257 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2258           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2259 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2260           submatrix  (same for all local rows)
2261 . d_nnz - array containing the number of block nonzeros in the various block rows
2262           in the upper triangular and diagonal part of the in diagonal portion of the local
2263           (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2264           for the diagonal entry and set a value even if it is zero.
2265 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2266           submatrix (same for all local rows).
2267 - o_nnz - array containing the number of nonzeros in the various block rows of the
2268           off-diagonal portion of the local submatrix that is right of the diagonal
2269           (possibly different for each block row) or `NULL`.
2270 
2271   Options Database Keys:
2272 + -mat_no_unroll  - uses code that does not unroll the loops in the
2273                     block calculations (much slower)
2274 - -mat_block_size - size of the blocks to use
2275 
2276   Level: intermediate
2277 
2278   Notes:
2279 
2280   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2281   than it must be used on all processors that share the object for that argument.
2282 
2283   If the *_nnz parameter is given then the *_nz parameter is ignored
2284 
2285   Storage Information:
2286   For a square global matrix we define each processor's diagonal portion
2287   to be its local rows and the corresponding columns (a square submatrix);
2288   each processor's off-diagonal portion encompasses the remainder of the
2289   local matrix (a rectangular submatrix).
2290 
2291   The user can specify preallocated storage for the diagonal part of
2292   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2293   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2294   memory allocation.  Likewise, specify preallocated storage for the
2295   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2296 
2297   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2298   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2299   You can also run with the option `-info` and look for messages with the string
2300   malloc in them to see if additional memory allocation was needed.
2301 
2302   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2303   the figure below we depict these three local rows and all columns (0-11).
2304 
2305 .vb
2306            0 1 2 3 4 5 6 7 8 9 10 11
2307           --------------------------
2308    row 3  |. . . d d d o o o o  o  o
2309    row 4  |. . . d d d o o o o  o  o
2310    row 5  |. . . d d d o o o o  o  o
2311           --------------------------
2312 .ve
2313 
2314   Thus, any entries in the d locations are stored in the d (diagonal)
2315   submatrix, and any entries in the o locations are stored in the
2316   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2317   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2318 
2319   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2320   plus the diagonal part of the d matrix,
2321   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2322 
2323   In general, for PDE problems in which most nonzeros are near the diagonal,
2324   one expects `d_nz` >> `o_nz`.
2325 
2326 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2327 @*/
2328 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2329 {
2330   PetscFunctionBegin;
2331   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2332   PetscValidType(B, 1);
2333   PetscValidLogicalCollectiveInt(B, bs, 2);
2334   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2335   PetscFunctionReturn(PETSC_SUCCESS);
2336 }
2337 
2338 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2339 /*@C
2340   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2341   (block compressed row).  For good matrix assembly performance
2342   the user should preallocate the matrix storage by setting the parameters
2343   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2344 
2345   Collective
2346 
2347   Input Parameters:
2348 + comm  - MPI communicator
2349 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2350           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2351 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2352           This value should be the same as the local size used in creating the
2353           y vector for the matrix-vector product y = Ax.
2354 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2355           This value should be the same as the local size used in creating the
2356           x vector for the matrix-vector product y = Ax.
2357 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2358 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2359 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2360           submatrix (same for all local rows)
2361 . d_nnz - array containing the number of block nonzeros in the various block rows
2362           in the upper triangular portion of the in diagonal portion of the local
2363           (possibly different for each block block row) or `NULL`.
2364           If you plan to factor the matrix you must leave room for the diagonal entry and
2365           set its value even if it is zero.
2366 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2367           submatrix (same for all local rows).
2368 - o_nnz - array containing the number of nonzeros in the various block rows of the
2369           off-diagonal portion of the local submatrix (possibly different for
2370           each block row) or `NULL`.
2371 
2372   Output Parameter:
2373 . A - the matrix
2374 
2375   Options Database Keys:
2376 + -mat_no_unroll  - uses code that does not unroll the loops in the
2377                     block calculations (much slower)
2378 . -mat_block_size - size of the blocks to use
2379 - -mat_mpi        - use the parallel matrix data structures even on one processor
2380                     (defaults to using SeqBAIJ format on one processor)
2381 
2382   Level: intermediate
2383 
2384   Notes:
2385   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2386   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2387   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2388 
2389   The number of rows and columns must be divisible by blocksize.
2390   This matrix type does not support complex Hermitian operation.
2391 
2392   The user MUST specify either the local or global matrix dimensions
2393   (possibly both).
2394 
2395   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2396   than it must be used on all processors that share the object for that argument.
2397 
2398   If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2399   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2400 
2401   If the *_nnz parameter is given then the *_nz parameter is ignored
2402 
2403   Storage Information:
2404   For a square global matrix we define each processor's diagonal portion
2405   to be its local rows and the corresponding columns (a square submatrix);
2406   each processor's off-diagonal portion encompasses the remainder of the
2407   local matrix (a rectangular submatrix).
2408 
2409   The user can specify preallocated storage for the diagonal part of
2410   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2411   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2412   memory allocation. Likewise, specify preallocated storage for the
2413   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2414 
2415   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2416   the figure below we depict these three local rows and all columns (0-11).
2417 
2418 .vb
2419            0 1 2 3 4 5 6 7 8 9 10 11
2420           --------------------------
2421    row 3  |. . . d d d o o o o  o  o
2422    row 4  |. . . d d d o o o o  o  o
2423    row 5  |. . . d d d o o o o  o  o
2424           --------------------------
2425 .ve
2426 
2427   Thus, any entries in the d locations are stored in the d (diagonal)
2428   submatrix, and any entries in the o locations are stored in the
2429   o (off-diagonal) submatrix. Note that the d matrix is stored in
2430   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2431 
2432   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2433   plus the diagonal part of the d matrix,
2434   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2435   In general, for PDE problems in which most nonzeros are near the diagonal,
2436   one expects `d_nz` >> `o_nz`.
2437 
2438 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2439           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2440 @*/
2441 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2442 {
2443   PetscMPIInt size;
2444 
2445   PetscFunctionBegin;
2446   PetscCall(MatCreate(comm, A));
2447   PetscCall(MatSetSizes(*A, m, n, M, N));
2448   PetscCallMPI(MPI_Comm_size(comm, &size));
2449   if (size > 1) {
2450     PetscCall(MatSetType(*A, MATMPISBAIJ));
2451     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2452   } else {
2453     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2454     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2455   }
2456   PetscFunctionReturn(PETSC_SUCCESS);
2457 }
2458 
2459 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2460 {
2461   Mat           mat;
2462   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2463   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2464   PetscScalar  *array;
2465 
2466   PetscFunctionBegin;
2467   *newmat = NULL;
2468 
2469   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2470   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2471   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2472   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2473   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2474 
2475   if (matin->hash_active) {
2476     PetscCall(MatSetUp(mat));
2477   } else {
2478     mat->factortype   = matin->factortype;
2479     mat->preallocated = PETSC_TRUE;
2480     mat->assembled    = PETSC_TRUE;
2481     mat->insertmode   = NOT_SET_VALUES;
2482 
2483     a      = (Mat_MPISBAIJ *)mat->data;
2484     a->bs2 = oldmat->bs2;
2485     a->mbs = oldmat->mbs;
2486     a->nbs = oldmat->nbs;
2487     a->Mbs = oldmat->Mbs;
2488     a->Nbs = oldmat->Nbs;
2489 
2490     a->size         = oldmat->size;
2491     a->rank         = oldmat->rank;
2492     a->donotstash   = oldmat->donotstash;
2493     a->roworiented  = oldmat->roworiented;
2494     a->rowindices   = NULL;
2495     a->rowvalues    = NULL;
2496     a->getrowactive = PETSC_FALSE;
2497     a->barray       = NULL;
2498     a->rstartbs     = oldmat->rstartbs;
2499     a->rendbs       = oldmat->rendbs;
2500     a->cstartbs     = oldmat->cstartbs;
2501     a->cendbs       = oldmat->cendbs;
2502 
2503     /* hash table stuff */
2504     a->ht           = NULL;
2505     a->hd           = NULL;
2506     a->ht_size      = 0;
2507     a->ht_flag      = oldmat->ht_flag;
2508     a->ht_fact      = oldmat->ht_fact;
2509     a->ht_total_ct  = 0;
2510     a->ht_insert_ct = 0;
2511 
2512     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2513     if (oldmat->colmap) {
2514 #if defined(PETSC_USE_CTABLE)
2515       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2516 #else
2517       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2518       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2519 #endif
2520     } else a->colmap = NULL;
2521 
2522     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2523       PetscCall(PetscMalloc1(len, &a->garray));
2524       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2525     } else a->garray = NULL;
2526 
2527     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2528     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2529     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2530 
2531     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2532     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2533 
2534     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2535     PetscCall(VecGetArray(a->slvec1, &array));
2536     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2537     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2538     PetscCall(VecRestoreArray(a->slvec1, &array));
2539     PetscCall(VecGetArray(a->slvec0, &array));
2540     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2541     PetscCall(VecRestoreArray(a->slvec0, &array));
2542 
2543     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2544     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2545     a->sMvctx = oldmat->sMvctx;
2546 
2547     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2548     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2549   }
2550   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2551   *newmat = mat;
2552   PetscFunctionReturn(PETSC_SUCCESS);
2553 }
2554 
2555 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2556 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2557 
2558 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2559 {
2560   PetscBool isbinary;
2561 
2562   PetscFunctionBegin;
2563   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2564   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2565   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2566   PetscFunctionReturn(PETSC_SUCCESS);
2567 }
2568 
2569 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2570 {
2571   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2572   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2573   PetscReal     atmp;
2574   PetscReal    *work, *svalues, *rvalues;
2575   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2576   PetscMPIInt   rank, size;
2577   PetscInt     *rowners_bs, dest, count, source;
2578   PetscScalar  *va;
2579   MatScalar    *ba;
2580   MPI_Status    stat;
2581 
2582   PetscFunctionBegin;
2583   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2584   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2585   PetscCall(VecGetArray(v, &va));
2586 
2587   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2588   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2589 
2590   bs  = A->rmap->bs;
2591   mbs = a->mbs;
2592   Mbs = a->Mbs;
2593   ba  = b->a;
2594   bi  = b->i;
2595   bj  = b->j;
2596 
2597   /* find ownerships */
2598   rowners_bs = A->rmap->range;
2599 
2600   /* each proc creates an array to be distributed */
2601   PetscCall(PetscCalloc1(bs * Mbs, &work));
2602 
2603   /* row_max for B */
2604   if (rank != size - 1) {
2605     for (i = 0; i < mbs; i++) {
2606       ncols = bi[1] - bi[0];
2607       bi++;
2608       brow = bs * i;
2609       for (j = 0; j < ncols; j++) {
2610         bcol = bs * (*bj);
2611         for (kcol = 0; kcol < bs; kcol++) {
2612           col = bcol + kcol;           /* local col index */
2613           col += rowners_bs[rank + 1]; /* global col index */
2614           for (krow = 0; krow < bs; krow++) {
2615             atmp = PetscAbsScalar(*ba);
2616             ba++;
2617             row = brow + krow; /* local row index */
2618             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2619             if (work[col] < atmp) work[col] = atmp;
2620           }
2621         }
2622         bj++;
2623       }
2624     }
2625 
2626     /* send values to its owners */
2627     for (dest = rank + 1; dest < size; dest++) {
2628       svalues = work + rowners_bs[dest];
2629       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2630       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2631     }
2632   }
2633 
2634   /* receive values */
2635   if (rank) {
2636     rvalues = work;
2637     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2638     for (source = 0; source < rank; source++) {
2639       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2640       /* process values */
2641       for (i = 0; i < count; i++) {
2642         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2643       }
2644     }
2645   }
2646 
2647   PetscCall(VecRestoreArray(v, &va));
2648   PetscCall(PetscFree(work));
2649   PetscFunctionReturn(PETSC_SUCCESS);
2650 }
2651 
2652 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2653 {
2654   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2655   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2656   PetscScalar       *x, *ptr, *from;
2657   Vec                bb1;
2658   const PetscScalar *b;
2659 
2660   PetscFunctionBegin;
2661   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2662   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2663 
2664   if (flag == SOR_APPLY_UPPER) {
2665     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2666     PetscFunctionReturn(PETSC_SUCCESS);
2667   }
2668 
2669   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2670     if (flag & SOR_ZERO_INITIAL_GUESS) {
2671       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2672       its--;
2673     }
2674 
2675     PetscCall(VecDuplicate(bb, &bb1));
2676     while (its--) {
2677       /* lower triangular part: slvec0b = - B^T*xx */
2678       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2679 
2680       /* copy xx into slvec0a */
2681       PetscCall(VecGetArray(mat->slvec0, &ptr));
2682       PetscCall(VecGetArray(xx, &x));
2683       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2684       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2685 
2686       PetscCall(VecScale(mat->slvec0, -1.0));
2687 
2688       /* copy bb into slvec1a */
2689       PetscCall(VecGetArray(mat->slvec1, &ptr));
2690       PetscCall(VecGetArrayRead(bb, &b));
2691       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2692       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2693 
2694       /* set slvec1b = 0 */
2695       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2696       PetscCall(VecZeroEntries(mat->slvec1b));
2697 
2698       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2699       PetscCall(VecRestoreArray(xx, &x));
2700       PetscCall(VecRestoreArrayRead(bb, &b));
2701       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2702 
2703       /* upper triangular part: bb1 = bb1 - B*x */
2704       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2705 
2706       /* local diagonal sweep */
2707       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2708     }
2709     PetscCall(VecDestroy(&bb1));
2710   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2711     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2712   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2713     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2714   } else if (flag & SOR_EISENSTAT) {
2715     Vec                xx1;
2716     PetscBool          hasop;
2717     const PetscScalar *diag;
2718     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2719     PetscInt           i, n;
2720 
2721     if (!mat->xx1) {
2722       PetscCall(VecDuplicate(bb, &mat->xx1));
2723       PetscCall(VecDuplicate(bb, &mat->bb1));
2724     }
2725     xx1 = mat->xx1;
2726     bb1 = mat->bb1;
2727 
2728     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2729 
2730     if (!mat->diag) {
2731       /* this is wrong for same matrix with new nonzero values */
2732       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2733       PetscCall(MatGetDiagonal(matin, mat->diag));
2734     }
2735     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2736 
2737     if (hasop) {
2738       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2739       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2740     } else {
2741       /*
2742           These two lines are replaced by code that may be a bit faster for a good compiler
2743       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2744       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2745       */
2746       PetscCall(VecGetArray(mat->slvec1a, &sl));
2747       PetscCall(VecGetArrayRead(mat->diag, &diag));
2748       PetscCall(VecGetArrayRead(bb, &b));
2749       PetscCall(VecGetArray(xx, &x));
2750       PetscCall(VecGetLocalSize(xx, &n));
2751       if (omega == 1.0) {
2752         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2753         PetscCall(PetscLogFlops(2.0 * n));
2754       } else {
2755         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2756         PetscCall(PetscLogFlops(3.0 * n));
2757       }
2758       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2759       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2760       PetscCall(VecRestoreArrayRead(bb, &b));
2761       PetscCall(VecRestoreArray(xx, &x));
2762     }
2763 
2764     /* multiply off-diagonal portion of matrix */
2765     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2766     PetscCall(VecZeroEntries(mat->slvec1b));
2767     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2768     PetscCall(VecGetArray(mat->slvec0, &from));
2769     PetscCall(VecGetArray(xx, &x));
2770     PetscCall(PetscArraycpy(from, x, bs * mbs));
2771     PetscCall(VecRestoreArray(mat->slvec0, &from));
2772     PetscCall(VecRestoreArray(xx, &x));
2773     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2774     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2775     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2776 
2777     /* local sweep */
2778     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2779     PetscCall(VecAXPY(xx, 1.0, xx1));
2780   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2781   PetscFunctionReturn(PETSC_SUCCESS);
2782 }
2783 
2784 /*@
2785   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2786 
2787   Collective
2788 
2789   Input Parameters:
2790 + comm - MPI communicator
2791 . bs   - the block size, only a block size of 1 is supported
2792 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2793 . n    - This value should be the same as the local size used in creating the
2794          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2795          calculated if `N` is given) For square matrices `n` is almost always `m`.
2796 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2797 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2798 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2799 . j    - column indices
2800 - a    - matrix values
2801 
2802   Output Parameter:
2803 . mat - the matrix
2804 
2805   Level: intermediate
2806 
2807   Notes:
2808   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2809   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2810   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2811 
2812   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2813 
2814 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2815           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2816 @*/
2817 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2818 {
2819   PetscFunctionBegin;
2820   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2821   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2822   PetscCall(MatCreate(comm, mat));
2823   PetscCall(MatSetSizes(*mat, m, n, M, N));
2824   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2825   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2826   PetscFunctionReturn(PETSC_SUCCESS);
2827 }
2828 
2829 /*@C
2830   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2831 
2832   Collective
2833 
2834   Input Parameters:
2835 + B  - the matrix
2836 . bs - the block size
2837 . i  - the indices into `j` for the start of each local row (indices start with zero)
2838 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2839 - v  - optional values in the matrix, pass `NULL` if not provided
2840 
2841   Level: advanced
2842 
2843   Notes:
2844   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2845   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2846   called this routine.
2847 
2848   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2849   and usually the numerical values as well
2850 
2851   Any entries passed in that are below the diagonal are ignored
2852 
2853 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2854           `MatCreateMPISBAIJWithArrays()`
2855 @*/
2856 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2857 {
2858   PetscFunctionBegin;
2859   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2860   PetscFunctionReturn(PETSC_SUCCESS);
2861 }
2862 
2863 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2864 {
2865   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2866   PetscInt    *indx;
2867   PetscScalar *values;
2868 
2869   PetscFunctionBegin;
2870   PetscCall(MatGetSize(inmat, &m, &N));
2871   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2872     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2873     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2874     PetscInt     *bindx, rmax = a->rmax, j;
2875     PetscMPIInt   rank, size;
2876 
2877     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2878     mbs = m / bs;
2879     Nbs = N / cbs;
2880     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2881     nbs = n / cbs;
2882 
2883     PetscCall(PetscMalloc1(rmax, &bindx));
2884     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2885 
2886     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2887     PetscCallMPI(MPI_Comm_rank(comm, &size));
2888     if (rank == size - 1) {
2889       /* Check sum(nbs) = Nbs */
2890       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2891     }
2892 
2893     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2894     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2895     for (i = 0; i < mbs; i++) {
2896       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2897       nnz = nnz / bs;
2898       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2899       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2900       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2901     }
2902     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2903     PetscCall(PetscFree(bindx));
2904 
2905     PetscCall(MatCreate(comm, outmat));
2906     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2907     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2908     PetscCall(MatSetType(*outmat, MATSBAIJ));
2909     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2910     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2911     MatPreallocateEnd(dnz, onz);
2912   }
2913 
2914   /* numeric phase */
2915   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2916   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2917 
2918   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2919   for (i = 0; i < m; i++) {
2920     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2921     Ii = i + rstart;
2922     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2923     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2924   }
2925   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2926   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2927   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2928   PetscFunctionReturn(PETSC_SUCCESS);
2929 }
2930