xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 258f4e960de9459edc774f7eab4bc0a4819f7230)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237   a_noinsert:; \
238     ailen[brow] = nrow; \
239   } while (0)
240 
241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
242   do { \
243     brow = row / bs; \
244     rp   = bj + bi[brow]; \
245     ap   = ba + bs2 * bi[brow]; \
246     rmax = bimax[brow]; \
247     nrow = bilen[brow]; \
248     bcol = col / bs; \
249     ridx = row % bs; \
250     cidx = col % bs; \
251     low  = 0; \
252     high = nrow; \
253     while (high - low > 3) { \
254       t = (low + high) / 2; \
255       if (rp[t] > bcol) high = t; \
256       else low = t; \
257     } \
258     for (_i = low; _i < high; _i++) { \
259       if (rp[_i] > bcol) break; \
260       if (rp[_i] == bcol) { \
261         bap = ap + bs2 * _i + bs * cidx + ridx; \
262         if (addv == ADD_VALUES) *bap += value; \
263         else *bap = value; \
264         goto b_noinsert; \
265       } \
266     } \
267     if (b->nonew == 1) goto b_noinsert; \
268     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
269     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
270     N = nrow++ - 1; \
271     /* shift up all the later entries in this row */ \
272     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
273     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
274     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
275     rp[_i]                          = bcol; \
276     ap[bs2 * _i + bs * cidx + ridx] = value; \
277   b_noinsert:; \
278     bilen[brow] = nrow; \
279   } while (0)
280 
281 /* Only add/insert a(i,j) with i<=j (blocks).
282    Any a(i,j) with i>j input by user is ignored or generates an error
283 */
284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
285 {
286   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
287   MatScalar     value;
288   PetscBool     roworiented = baij->roworiented;
289   PetscInt      i, j, row, col;
290   PetscInt      rstart_orig = mat->rmap->rstart;
291   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
292   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
293 
294   /* Some Variables required in the macro */
295   Mat           A     = baij->A;
296   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)A->data;
297   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
298   MatScalar    *aa = a->a;
299 
300   Mat          B     = baij->B;
301   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
302   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
303   MatScalar   *ba = b->a;
304 
305   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
306   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
307   MatScalar *ap, *bap;
308 
309   /* for stash */
310   PetscInt   n_loc, *in_loc = NULL;
311   MatScalar *v_loc = NULL;
312 
313   PetscFunctionBegin;
314   if (!baij->donotstash) {
315     if (n > baij->n_loc) {
316       PetscCall(PetscFree(baij->in_loc));
317       PetscCall(PetscFree(baij->v_loc));
318       PetscCall(PetscMalloc1(n, &baij->in_loc));
319       PetscCall(PetscMalloc1(n, &baij->v_loc));
320 
321       baij->n_loc = n;
322     }
323     in_loc = baij->in_loc;
324     v_loc  = baij->v_loc;
325   }
326 
327   for (i = 0; i < m; i++) {
328     if (im[i] < 0) continue;
329     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
330     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
331       row = im[i] - rstart_orig;                     /* local row index */
332       for (j = 0; j < n; j++) {
333         if (im[i] / bs > in[j] / bs) {
334           if (a->ignore_ltriangular) {
335             continue; /* ignore lower triangular blocks */
336           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
337         }
338         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
339           col  = in[j] - cstart_orig;                    /* local col index */
340           brow = row / bs;
341           bcol = col / bs;
342           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
343           if (roworiented) value = v[i * n + j];
344           else value = v[i + j * m];
345           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
346           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
347         } else if (in[j] < 0) {
348           continue;
349         } else {
350           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
351           /* off-diag entry (B) */
352           if (mat->was_assembled) {
353             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
354 #if defined(PETSC_USE_CTABLE)
355             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
356             col = col - 1;
357 #else
358             col = baij->colmap[in[j] / bs] - 1;
359 #endif
360             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
361               PetscCall(MatDisAssemble_MPISBAIJ(mat));
362               col = in[j];
363               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
364               B     = baij->B;
365               b     = (Mat_SeqBAIJ *)B->data;
366               bimax = b->imax;
367               bi    = b->i;
368               bilen = b->ilen;
369               bj    = b->j;
370               ba    = b->a;
371             } else col += in[j] % bs;
372           } else col = in[j];
373           if (roworiented) value = v[i * n + j];
374           else value = v[i + j * m];
375           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
376           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
377         }
378       }
379     } else { /* off processor entry */
380       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
381       if (!baij->donotstash) {
382         mat->assembled = PETSC_FALSE;
383         n_loc          = 0;
384         for (j = 0; j < n; j++) {
385           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
386           in_loc[n_loc] = in[j];
387           if (roworiented) {
388             v_loc[n_loc] = v[i * n + j];
389           } else {
390             v_loc[n_loc] = v[j * m + i];
391           }
392           n_loc++;
393         }
394         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
395       }
396     }
397   }
398   PetscFunctionReturn(PETSC_SUCCESS);
399 }
400 
401 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
402 {
403   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
404   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
405   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
406   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
407   PetscBool          roworiented = a->roworiented;
408   const PetscScalar *value       = v;
409   MatScalar         *ap, *aa = a->a, *bap;
410 
411   PetscFunctionBegin;
412   if (col < row) {
413     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
414     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
415   }
416   rp    = aj + ai[row];
417   ap    = aa + bs2 * ai[row];
418   rmax  = imax[row];
419   nrow  = ailen[row];
420   value = v;
421   low   = 0;
422   high  = nrow;
423 
424   while (high - low > 7) {
425     t = (low + high) / 2;
426     if (rp[t] > col) high = t;
427     else low = t;
428   }
429   for (i = low; i < high; i++) {
430     if (rp[i] > col) break;
431     if (rp[i] == col) {
432       bap = ap + bs2 * i;
433       if (roworiented) {
434         if (is == ADD_VALUES) {
435           for (ii = 0; ii < bs; ii++) {
436             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
437           }
438         } else {
439           for (ii = 0; ii < bs; ii++) {
440             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
441           }
442         }
443       } else {
444         if (is == ADD_VALUES) {
445           for (ii = 0; ii < bs; ii++) {
446             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
447           }
448         } else {
449           for (ii = 0; ii < bs; ii++) {
450             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
451           }
452         }
453       }
454       goto noinsert2;
455     }
456   }
457   if (nonew == 1) goto noinsert2;
458   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
459   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
460   N = nrow++ - 1;
461   high++;
462   /* shift up all the later entries in this row */
463   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
464   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
465   rp[i] = col;
466   bap   = ap + bs2 * i;
467   if (roworiented) {
468     for (ii = 0; ii < bs; ii++) {
469       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
470     }
471   } else {
472     for (ii = 0; ii < bs; ii++) {
473       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
474     }
475   }
476 noinsert2:;
477   ailen[row] = nrow;
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 
481 /*
482    This routine is exactly duplicated in mpibaij.c
483 */
484 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
485 {
486   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
487   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
488   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
489   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
490   PetscBool          roworiented = a->roworiented;
491   const PetscScalar *value       = v;
492   MatScalar         *ap, *aa = a->a, *bap;
493 
494   PetscFunctionBegin;
495   rp    = aj + ai[row];
496   ap    = aa + bs2 * ai[row];
497   rmax  = imax[row];
498   nrow  = ailen[row];
499   low   = 0;
500   high  = nrow;
501   value = v;
502   while (high - low > 7) {
503     t = (low + high) / 2;
504     if (rp[t] > col) high = t;
505     else low = t;
506   }
507   for (i = low; i < high; i++) {
508     if (rp[i] > col) break;
509     if (rp[i] == col) {
510       bap = ap + bs2 * i;
511       if (roworiented) {
512         if (is == ADD_VALUES) {
513           for (ii = 0; ii < bs; ii++) {
514             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
515           }
516         } else {
517           for (ii = 0; ii < bs; ii++) {
518             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
519           }
520         }
521       } else {
522         if (is == ADD_VALUES) {
523           for (ii = 0; ii < bs; ii++, value += bs) {
524             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
525             bap += bs;
526           }
527         } else {
528           for (ii = 0; ii < bs; ii++, value += bs) {
529             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
530             bap += bs;
531           }
532         }
533       }
534       goto noinsert2;
535     }
536   }
537   if (nonew == 1) goto noinsert2;
538   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
539   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
540   N = nrow++ - 1;
541   high++;
542   /* shift up all the later entries in this row */
543   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
544   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
545   rp[i] = col;
546   bap   = ap + bs2 * i;
547   if (roworiented) {
548     for (ii = 0; ii < bs; ii++) {
549       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
550     }
551   } else {
552     for (ii = 0; ii < bs; ii++) {
553       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
554     }
555   }
556 noinsert2:;
557   ailen[row] = nrow;
558   PetscFunctionReturn(PETSC_SUCCESS);
559 }
560 
561 /*
562     This routine could be optimized by removing the need for the block copy below and passing stride information
563   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
564 */
565 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
566 {
567   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
568   const MatScalar *value;
569   MatScalar       *barray      = baij->barray;
570   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
571   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
572   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
573   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
574 
575   PetscFunctionBegin;
576   if (!barray) {
577     PetscCall(PetscMalloc1(bs2, &barray));
578     baij->barray = barray;
579   }
580 
581   if (roworiented) {
582     stepval = (n - 1) * bs;
583   } else {
584     stepval = (m - 1) * bs;
585   }
586   for (i = 0; i < m; i++) {
587     if (im[i] < 0) continue;
588     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
589     if (im[i] >= rstart && im[i] < rend) {
590       row = im[i] - rstart;
591       for (j = 0; j < n; j++) {
592         if (im[i] > in[j]) {
593           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
594           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
595         }
596         /* If NumCol = 1 then a copy is not required */
597         if ((roworiented) && (n == 1)) {
598           barray = (MatScalar *)v + i * bs2;
599         } else if ((!roworiented) && (m == 1)) {
600           barray = (MatScalar *)v + j * bs2;
601         } else { /* Here a copy is required */
602           if (roworiented) {
603             value = v + i * (stepval + bs) * bs + j * bs;
604           } else {
605             value = v + j * (stepval + bs) * bs + i * bs;
606           }
607           for (ii = 0; ii < bs; ii++, value += stepval) {
608             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
609           }
610           barray -= bs2;
611         }
612 
613         if (in[j] >= cstart && in[j] < cend) {
614           col = in[j] - cstart;
615           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
616         } else if (in[j] < 0) {
617           continue;
618         } else {
619           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
620           if (mat->was_assembled) {
621             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
622 
623 #if defined(PETSC_USE_CTABLE)
624             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
625             col = col < 1 ? -1 : (col - 1) / bs;
626 #else
627             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
628 #endif
629             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
630               PetscCall(MatDisAssemble_MPISBAIJ(mat));
631               col = in[j];
632             }
633           } else col = in[j];
634           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
635         }
636       }
637     } else {
638       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
639       if (!baij->donotstash) {
640         if (roworiented) {
641           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
642         } else {
643           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
644         }
645       }
646     }
647   }
648   PetscFunctionReturn(PETSC_SUCCESS);
649 }
650 
651 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
652 {
653   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
654   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
655   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
656 
657   PetscFunctionBegin;
658   for (i = 0; i < m; i++) {
659     if (idxm[i] < 0) continue; /* negative row */
660     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
661     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
662       row = idxm[i] - bsrstart;
663       for (j = 0; j < n; j++) {
664         if (idxn[j] < 0) continue; /* negative column */
665         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
666         if (idxn[j] >= bscstart && idxn[j] < bscend) {
667           col = idxn[j] - bscstart;
668           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
669         } else {
670           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
671 #if defined(PETSC_USE_CTABLE)
672           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
673           data--;
674 #else
675           data = baij->colmap[idxn[j] / bs] - 1;
676 #endif
677           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
678           else {
679             col = data + idxn[j] % bs;
680             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
681           }
682         }
683       }
684     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
685   }
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
690 {
691   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
692   PetscReal     sum[2], *lnorm2;
693 
694   PetscFunctionBegin;
695   if (baij->size == 1) {
696     PetscCall(MatNorm(baij->A, type, norm));
697   } else {
698     if (type == NORM_FROBENIUS) {
699       PetscCall(PetscMalloc1(2, &lnorm2));
700       PetscCall(MatNorm(baij->A, type, lnorm2));
701       *lnorm2 = (*lnorm2) * (*lnorm2);
702       lnorm2++; /* squar power of norm(A) */
703       PetscCall(MatNorm(baij->B, type, lnorm2));
704       *lnorm2 = (*lnorm2) * (*lnorm2);
705       lnorm2--; /* squar power of norm(B) */
706       PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
707       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
708       PetscCall(PetscFree(lnorm2));
709     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
710       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
711       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
712       PetscReal    *rsum, *rsum2, vabs;
713       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
714       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
715       MatScalar    *v;
716       PetscMPIInt   iN;
717 
718       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
719       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
720       /* Amat */
721       v  = amat->a;
722       jj = amat->j;
723       for (brow = 0; brow < mbs; brow++) {
724         grow = bs * (rstart + brow);
725         nz   = amat->i[brow + 1] - amat->i[brow];
726         for (bcol = 0; bcol < nz; bcol++) {
727           gcol = bs * (rstart + *jj);
728           jj++;
729           for (col = 0; col < bs; col++) {
730             for (row = 0; row < bs; row++) {
731               vabs = PetscAbsScalar(*v);
732               v++;
733               rsum[gcol + col] += vabs;
734               /* non-diagonal block */
735               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
736             }
737           }
738         }
739         PetscCall(PetscLogFlops(nz * bs * bs));
740       }
741       /* Bmat */
742       v  = bmat->a;
743       jj = bmat->j;
744       for (brow = 0; brow < mbs; brow++) {
745         grow = bs * (rstart + brow);
746         nz   = bmat->i[brow + 1] - bmat->i[brow];
747         for (bcol = 0; bcol < nz; bcol++) {
748           gcol = bs * garray[*jj];
749           jj++;
750           for (col = 0; col < bs; col++) {
751             for (row = 0; row < bs; row++) {
752               vabs = PetscAbsScalar(*v);
753               v++;
754               rsum[gcol + col] += vabs;
755               rsum[grow + row] += vabs;
756             }
757           }
758         }
759         PetscCall(PetscLogFlops(nz * bs * bs));
760       }
761       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
762       PetscCallMPI(MPIU_Allreduce(rsum, rsum2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
763       *norm = 0.0;
764       for (col = 0; col < mat->cmap->N; col++) {
765         if (rsum2[col] > *norm) *norm = rsum2[col];
766       }
767       PetscCall(PetscFree2(rsum, rsum2));
768     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
769   }
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
774 {
775   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
776   PetscInt      nstash, reallocs;
777 
778   PetscFunctionBegin;
779   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
780 
781   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
782   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
783   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
784   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
785   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
786   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
791 {
792   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
793   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
794   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
795   PetscInt     *row, *col;
796   PetscBool     other_disassembled;
797   PetscMPIInt   n;
798   PetscBool     r1, r2, r3;
799   MatScalar    *val;
800 
801   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
802   PetscFunctionBegin;
803   if (!baij->donotstash && !mat->nooffprocentries) {
804     while (1) {
805       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
806       if (!flg) break;
807 
808       for (i = 0; i < n;) {
809         /* Now identify the consecutive vals belonging to the same row */
810         for (j = i, rstart = row[j]; j < n; j++) {
811           if (row[j] != rstart) break;
812         }
813         if (j < n) ncols = j - i;
814         else ncols = n - i;
815         /* Now assemble all these values with a single function call */
816         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
817         i = j;
818       }
819     }
820     PetscCall(MatStashScatterEnd_Private(&mat->stash));
821     /* Now process the block-stash. Since the values are stashed column-oriented,
822        set the row-oriented flag to column-oriented, and after MatSetValues()
823        restore the original flags */
824     r1 = baij->roworiented;
825     r2 = a->roworiented;
826     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
827 
828     baij->roworiented = PETSC_FALSE;
829     a->roworiented    = PETSC_FALSE;
830 
831     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
832     while (1) {
833       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
834       if (!flg) break;
835 
836       for (i = 0; i < n;) {
837         /* Now identify the consecutive vals belonging to the same row */
838         for (j = i, rstart = row[j]; j < n; j++) {
839           if (row[j] != rstart) break;
840         }
841         if (j < n) ncols = j - i;
842         else ncols = n - i;
843         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
844         i = j;
845       }
846     }
847     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
848 
849     baij->roworiented = r1;
850     a->roworiented    = r2;
851 
852     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
853   }
854 
855   PetscCall(MatAssemblyBegin(baij->A, mode));
856   PetscCall(MatAssemblyEnd(baij->A, mode));
857 
858   /* determine if any processor has disassembled, if so we must
859      also disassemble ourselves, in order that we may reassemble. */
860   /*
861      if nonzero structure of submatrix B cannot change then we know that
862      no processor disassembled thus we can skip this stuff
863   */
864   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
865     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
866     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
867   }
868 
869   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
870   PetscCall(MatAssemblyBegin(baij->B, mode));
871   PetscCall(MatAssemblyEnd(baij->B, mode));
872 
873   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
874 
875   baij->rowvalues = NULL;
876 
877   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
878   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
879     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
880     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
881   }
882   PetscFunctionReturn(PETSC_SUCCESS);
883 }
884 
885 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
886 #include <petscdraw.h>
887 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
888 {
889   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
890   PetscInt          bs   = mat->rmap->bs;
891   PetscMPIInt       rank = baij->rank;
892   PetscBool         iascii, isdraw;
893   PetscViewer       sviewer;
894   PetscViewerFormat format;
895 
896   PetscFunctionBegin;
897   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
898   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
899   if (iascii) {
900     PetscCall(PetscViewerGetFormat(viewer, &format));
901     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
902       MatInfo info;
903       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
904       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
905       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
906       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
907                                                    mat->rmap->bs, (double)info.memory));
908       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
909       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
910       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
911       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
912       PetscCall(PetscViewerFlush(viewer));
913       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
914       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
915       PetscCall(VecScatterView(baij->Mvctx, viewer));
916       PetscFunctionReturn(PETSC_SUCCESS);
917     } else if (format == PETSC_VIEWER_ASCII_INFO) {
918       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
919       PetscFunctionReturn(PETSC_SUCCESS);
920     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
921       PetscFunctionReturn(PETSC_SUCCESS);
922     }
923   }
924 
925   if (isdraw) {
926     PetscDraw draw;
927     PetscBool isnull;
928     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
929     PetscCall(PetscDrawIsNull(draw, &isnull));
930     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
931   }
932 
933   {
934     /* assemble the entire matrix onto first processor. */
935     Mat           A;
936     Mat_SeqSBAIJ *Aloc;
937     Mat_SeqBAIJ  *Bloc;
938     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
939     MatScalar    *a;
940     const char   *matname;
941 
942     /* Should this be the same type as mat? */
943     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
944     if (rank == 0) {
945       PetscCall(MatSetSizes(A, M, N, M, N));
946     } else {
947       PetscCall(MatSetSizes(A, 0, 0, M, N));
948     }
949     PetscCall(MatSetType(A, MATMPISBAIJ));
950     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
951     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
955     ai   = Aloc->i;
956     aj   = Aloc->j;
957     a    = Aloc->a;
958     PetscCall(PetscMalloc1(bs, &rvals));
959 
960     for (i = 0; i < mbs; i++) {
961       rvals[0] = bs * (baij->rstartbs + i);
962       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
963       for (j = ai[i]; j < ai[i + 1]; j++) {
964         col = (baij->cstartbs + aj[j]) * bs;
965         for (k = 0; k < bs; k++) {
966           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
967           col++;
968           a += bs;
969         }
970       }
971     }
972     /* copy over the B part */
973     Bloc = (Mat_SeqBAIJ *)baij->B->data;
974     ai   = Bloc->i;
975     aj   = Bloc->j;
976     a    = Bloc->a;
977     for (i = 0; i < mbs; i++) {
978       rvals[0] = bs * (baij->rstartbs + i);
979       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
980       for (j = ai[i]; j < ai[i + 1]; j++) {
981         col = baij->garray[aj[j]] * bs;
982         for (k = 0; k < bs; k++) {
983           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
984           col++;
985           a += bs;
986         }
987       }
988     }
989     PetscCall(PetscFree(rvals));
990     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
991     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
992     /*
993        Everyone has to call to draw the matrix since the graphics waits are
994        synchronized across all processors that share the PetscDraw object
995     */
996     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
997     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
998     if (rank == 0) {
999       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
1000       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
1001     }
1002     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1003     PetscCall(MatDestroy(&A));
1004   }
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1009 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1010 
1011 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1012 {
1013   PetscBool iascii, isdraw, issocket, isbinary;
1014 
1015   PetscFunctionBegin;
1016   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1020   if (iascii || isdraw || issocket) {
1021     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1022   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1023   PetscFunctionReturn(PETSC_SUCCESS);
1024 }
1025 
1026 #if defined(PETSC_USE_COMPLEX)
1027 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1028 {
1029   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1030   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1031   PetscScalar       *from;
1032   const PetscScalar *x;
1033 
1034   PetscFunctionBegin;
1035   /* diagonal part */
1036   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1037   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1038   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1039   PetscCall(VecZeroEntries(a->slvec1b));
1040 
1041   /* subdiagonal part */
1042   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1043   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1044 
1045   /* copy x into the vec slvec0 */
1046   PetscCall(VecGetArray(a->slvec0, &from));
1047   PetscCall(VecGetArrayRead(xx, &x));
1048 
1049   PetscCall(PetscArraycpy(from, x, bs * mbs));
1050   PetscCall(VecRestoreArray(a->slvec0, &from));
1051   PetscCall(VecRestoreArrayRead(xx, &x));
1052 
1053   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1054   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1055   /* supperdiagonal part */
1056   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1057   PetscFunctionReturn(PETSC_SUCCESS);
1058 }
1059 #endif
1060 
1061 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1062 {
1063   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1064   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1065   PetscScalar       *from;
1066   const PetscScalar *x;
1067 
1068   PetscFunctionBegin;
1069   /* diagonal part */
1070   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1071   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1072   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1073   PetscCall(VecZeroEntries(a->slvec1b));
1074 
1075   /* subdiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1077 
1078   /* copy x into the vec slvec0 */
1079   PetscCall(VecGetArray(a->slvec0, &from));
1080   PetscCall(VecGetArrayRead(xx, &x));
1081 
1082   PetscCall(PetscArraycpy(from, x, bs * mbs));
1083   PetscCall(VecRestoreArray(a->slvec0, &from));
1084   PetscCall(VecRestoreArrayRead(xx, &x));
1085 
1086   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1087   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1088   /* supperdiagonal part */
1089   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1090   PetscFunctionReturn(PETSC_SUCCESS);
1091 }
1092 
1093 #if PetscDefined(USE_COMPLEX)
1094 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1095 {
1096   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1097   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1098   PetscScalar       *from;
1099   const PetscScalar *x;
1100 
1101   PetscFunctionBegin;
1102   /* diagonal part */
1103   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1104   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1105   PetscCall(VecZeroEntries(a->slvec1b));
1106 
1107   /* subdiagonal part */
1108   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1109   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1110 
1111   /* copy x into the vec slvec0 */
1112   PetscCall(VecGetArray(a->slvec0, &from));
1113   PetscCall(VecGetArrayRead(xx, &x));
1114   PetscCall(PetscArraycpy(from, x, bs * mbs));
1115   PetscCall(VecRestoreArray(a->slvec0, &from));
1116 
1117   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1118   PetscCall(VecRestoreArrayRead(xx, &x));
1119   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1120 
1121   /* supperdiagonal part */
1122   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 #endif
1126 
1127 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1128 {
1129   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1130   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1131   PetscScalar       *from;
1132   const PetscScalar *x;
1133 
1134   PetscFunctionBegin;
1135   /* diagonal part */
1136   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1137   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1138   PetscCall(VecZeroEntries(a->slvec1b));
1139 
1140   /* subdiagonal part */
1141   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1142 
1143   /* copy x into the vec slvec0 */
1144   PetscCall(VecGetArray(a->slvec0, &from));
1145   PetscCall(VecGetArrayRead(xx, &x));
1146   PetscCall(PetscArraycpy(from, x, bs * mbs));
1147   PetscCall(VecRestoreArray(a->slvec0, &from));
1148 
1149   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1150   PetscCall(VecRestoreArrayRead(xx, &x));
1151   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1152 
1153   /* supperdiagonal part */
1154   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1155   PetscFunctionReturn(PETSC_SUCCESS);
1156 }
1157 
1158 /*
1159   This only works correctly for square matrices where the subblock A->A is the
1160    diagonal block
1161 */
1162 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1163 {
1164   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1165 
1166   PetscFunctionBegin;
1167   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1168   PetscCall(MatGetDiagonal(a->A, v));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1173 {
1174   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatScale(a->A, aa));
1178   PetscCall(MatScale(a->B, aa));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1183 {
1184   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1185   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1186   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1187   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1188   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1189 
1190   PetscFunctionBegin;
1191   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1192   mat->getrowactive = PETSC_TRUE;
1193 
1194   if (!mat->rowvalues && (idx || v)) {
1195     /*
1196         allocate enough space to hold information from the longest row.
1197     */
1198     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1199     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1200     PetscInt      max = 1, mbs = mat->mbs, tmp;
1201     for (i = 0; i < mbs; i++) {
1202       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1203       if (max < tmp) max = tmp;
1204     }
1205     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1206   }
1207 
1208   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1209   lrow = row - brstart; /* local row index */
1210 
1211   pvA = &vworkA;
1212   pcA = &cworkA;
1213   pvB = &vworkB;
1214   pcB = &cworkB;
1215   if (!v) {
1216     pvA = NULL;
1217     pvB = NULL;
1218   }
1219   if (!idx) {
1220     pcA = NULL;
1221     if (!v) pcB = NULL;
1222   }
1223   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1224   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1225   nztot = nzA + nzB;
1226 
1227   cmap = mat->garray;
1228   if (v || idx) {
1229     if (nztot) {
1230       /* Sort by increasing column numbers, assuming A and B already sorted */
1231       PetscInt imark = -1;
1232       if (v) {
1233         *v = v_p = mat->rowvalues;
1234         for (i = 0; i < nzB; i++) {
1235           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1236           else break;
1237         }
1238         imark = i;
1239         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1240         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1241       }
1242       if (idx) {
1243         *idx = idx_p = mat->rowindices;
1244         if (imark > -1) {
1245           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1246         } else {
1247           for (i = 0; i < nzB; i++) {
1248             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1249             else break;
1250           }
1251           imark = i;
1252         }
1253         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1254         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1255       }
1256     } else {
1257       if (idx) *idx = NULL;
1258       if (v) *v = NULL;
1259     }
1260   }
1261   *nz = nztot;
1262   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1263   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1264   PetscFunctionReturn(PETSC_SUCCESS);
1265 }
1266 
1267 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1268 {
1269   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1270 
1271   PetscFunctionBegin;
1272   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1273   baij->getrowactive = PETSC_FALSE;
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1278 {
1279   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1280   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1281 
1282   PetscFunctionBegin;
1283   aA->getrow_utriangular = PETSC_TRUE;
1284   PetscFunctionReturn(PETSC_SUCCESS);
1285 }
1286 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1287 {
1288   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1289   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1290 
1291   PetscFunctionBegin;
1292   aA->getrow_utriangular = PETSC_FALSE;
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 
1296 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1297 {
1298   PetscFunctionBegin;
1299   if (PetscDefined(USE_COMPLEX)) {
1300     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1301 
1302     PetscCall(MatConjugate(a->A));
1303     PetscCall(MatConjugate(a->B));
1304   }
1305   PetscFunctionReturn(PETSC_SUCCESS);
1306 }
1307 
1308 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1309 {
1310   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1311 
1312   PetscFunctionBegin;
1313   PetscCall(MatRealPart(a->A));
1314   PetscCall(MatRealPart(a->B));
1315   PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317 
1318 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1319 {
1320   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1321 
1322   PetscFunctionBegin;
1323   PetscCall(MatImaginaryPart(a->A));
1324   PetscCall(MatImaginaryPart(a->B));
1325   PetscFunctionReturn(PETSC_SUCCESS);
1326 }
1327 
1328 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1329    Input: isrow       - distributed(parallel),
1330           iscol_local - locally owned (seq)
1331 */
1332 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1333 {
1334   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1335   const PetscInt *ptr1, *ptr2;
1336 
1337   PetscFunctionBegin;
1338   *flg = PETSC_FALSE;
1339   PetscCall(ISGetLocalSize(isrow, &sz1));
1340   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1341   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1342 
1343   PetscCall(ISGetIndices(isrow, &ptr1));
1344   PetscCall(ISGetIndices(iscol_local, &ptr2));
1345 
1346   PetscCall(PetscMalloc1(sz1, &a1));
1347   PetscCall(PetscMalloc1(sz2, &a2));
1348   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1349   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1350   PetscCall(PetscSortInt(sz1, a1));
1351   PetscCall(PetscSortInt(sz2, a2));
1352 
1353   nmatch = 0;
1354   k      = 0;
1355   for (i = 0; i < sz1; i++) {
1356     for (j = k; j < sz2; j++) {
1357       if (a1[i] == a2[j]) {
1358         k = j;
1359         nmatch++;
1360         break;
1361       }
1362     }
1363   }
1364   PetscCall(ISRestoreIndices(isrow, &ptr1));
1365   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1366   PetscCall(PetscFree(a1));
1367   PetscCall(PetscFree(a2));
1368   if (nmatch < sz1) {
1369     *flg = PETSC_FALSE;
1370   } else {
1371     *flg = PETSC_TRUE;
1372   }
1373   PetscFunctionReturn(PETSC_SUCCESS);
1374 }
1375 
1376 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1377 {
1378   Mat       C[2];
1379   IS        iscol_local, isrow_local;
1380   PetscInt  csize, csize_local, rsize;
1381   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1382 
1383   PetscFunctionBegin;
1384   PetscCall(ISGetLocalSize(iscol, &csize));
1385   PetscCall(ISGetLocalSize(isrow, &rsize));
1386   if (call == MAT_REUSE_MATRIX) {
1387     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1388     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1389   } else {
1390     PetscCall(ISAllGather(iscol, &iscol_local));
1391     PetscCall(ISSorted(iscol_local, &issorted));
1392     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1393   }
1394   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1395   if (!isequal) {
1396     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1397     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1398     if (!isidentity) {
1399       if (call == MAT_REUSE_MATRIX) {
1400         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1401         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1402       } else {
1403         PetscCall(ISAllGather(isrow, &isrow_local));
1404         PetscCall(ISSorted(isrow_local, &issorted));
1405         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1406       }
1407     }
1408   }
1409   /* now call MatCreateSubMatrix_MPIBAIJ() */
1410   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1411   if (!isequal && !isidentity) {
1412     if (call == MAT_INITIAL_MATRIX) {
1413       IS       intersect;
1414       PetscInt ni;
1415 
1416       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1417       PetscCall(ISGetLocalSize(intersect, &ni));
1418       PetscCall(ISDestroy(&intersect));
1419       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1420     }
1421     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1422     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1423     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1424     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1425     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1426     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1427     PetscCall(MatDestroy(C));
1428     PetscCall(MatDestroy(C + 1));
1429   }
1430   if (call == MAT_INITIAL_MATRIX) {
1431     if (!isequal && !isidentity) {
1432       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1433       PetscCall(ISDestroy(&isrow_local));
1434     }
1435     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1436     PetscCall(ISDestroy(&iscol_local));
1437   }
1438   PetscFunctionReturn(PETSC_SUCCESS);
1439 }
1440 
1441 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1442 {
1443   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1444 
1445   PetscFunctionBegin;
1446   PetscCall(MatZeroEntries(l->A));
1447   PetscCall(MatZeroEntries(l->B));
1448   PetscFunctionReturn(PETSC_SUCCESS);
1449 }
1450 
1451 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1452 {
1453   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1454   Mat            A = a->A, B = a->B;
1455   PetscLogDouble isend[5], irecv[5];
1456 
1457   PetscFunctionBegin;
1458   info->block_size = (PetscReal)matin->rmap->bs;
1459 
1460   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1461 
1462   isend[0] = info->nz_used;
1463   isend[1] = info->nz_allocated;
1464   isend[2] = info->nz_unneeded;
1465   isend[3] = info->memory;
1466   isend[4] = info->mallocs;
1467 
1468   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1469 
1470   isend[0] += info->nz_used;
1471   isend[1] += info->nz_allocated;
1472   isend[2] += info->nz_unneeded;
1473   isend[3] += info->memory;
1474   isend[4] += info->mallocs;
1475   if (flag == MAT_LOCAL) {
1476     info->nz_used      = isend[0];
1477     info->nz_allocated = isend[1];
1478     info->nz_unneeded  = isend[2];
1479     info->memory       = isend[3];
1480     info->mallocs      = isend[4];
1481   } else if (flag == MAT_GLOBAL_MAX) {
1482     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1483 
1484     info->nz_used      = irecv[0];
1485     info->nz_allocated = irecv[1];
1486     info->nz_unneeded  = irecv[2];
1487     info->memory       = irecv[3];
1488     info->mallocs      = irecv[4];
1489   } else if (flag == MAT_GLOBAL_SUM) {
1490     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1491 
1492     info->nz_used      = irecv[0];
1493     info->nz_allocated = irecv[1];
1494     info->nz_unneeded  = irecv[2];
1495     info->memory       = irecv[3];
1496     info->mallocs      = irecv[4];
1497   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1498   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1499   info->fill_ratio_needed = 0;
1500   info->factor_mallocs    = 0;
1501   PetscFunctionReturn(PETSC_SUCCESS);
1502 }
1503 
1504 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1505 {
1506   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1507   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1508 
1509   PetscFunctionBegin;
1510   switch (op) {
1511   case MAT_NEW_NONZERO_LOCATIONS:
1512   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1513   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1514   case MAT_KEEP_NONZERO_PATTERN:
1515   case MAT_NEW_NONZERO_LOCATION_ERR:
1516     MatCheckPreallocated(A, 1);
1517     PetscCall(MatSetOption(a->A, op, flg));
1518     PetscCall(MatSetOption(a->B, op, flg));
1519     break;
1520   case MAT_ROW_ORIENTED:
1521     MatCheckPreallocated(A, 1);
1522     a->roworiented = flg;
1523 
1524     PetscCall(MatSetOption(a->A, op, flg));
1525     PetscCall(MatSetOption(a->B, op, flg));
1526     break;
1527   case MAT_FORCE_DIAGONAL_ENTRIES:
1528   case MAT_SORTED_FULL:
1529   case MAT_SUBMAT_SINGLEIS:
1530     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1531     break;
1532   case MAT_IGNORE_OFF_PROC_ENTRIES:
1533     a->donotstash = flg;
1534     break;
1535   case MAT_USE_HASH_TABLE:
1536     a->ht_flag = flg;
1537     break;
1538   case MAT_HERMITIAN:
1539     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1540 #if defined(PETSC_USE_COMPLEX)
1541     if (flg) { /* need different mat-vec ops */
1542       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1543       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1544       A->ops->multtranspose    = NULL;
1545       A->ops->multtransposeadd = NULL;
1546       A->symmetric             = PETSC_BOOL3_FALSE;
1547     }
1548 #endif
1549     break;
1550   case MAT_SPD:
1551   case MAT_SYMMETRIC:
1552     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1553 #if defined(PETSC_USE_COMPLEX)
1554     if (flg) { /* restore to use default mat-vec ops */
1555       A->ops->mult             = MatMult_MPISBAIJ;
1556       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1557       A->ops->multtranspose    = MatMult_MPISBAIJ;
1558       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1559     }
1560 #endif
1561     break;
1562   case MAT_STRUCTURALLY_SYMMETRIC:
1563     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1564     break;
1565   case MAT_SYMMETRY_ETERNAL:
1566   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1567     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1568     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1569     break;
1570   case MAT_SPD_ETERNAL:
1571     break;
1572   case MAT_IGNORE_LOWER_TRIANGULAR:
1573   case MAT_ERROR_LOWER_TRIANGULAR:
1574     aA->ignore_ltriangular = flg;
1575     break;
1576   case MAT_GETROW_UPPERTRIANGULAR:
1577     aA->getrow_utriangular = flg;
1578     break;
1579   default:
1580     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1581   }
1582   PetscFunctionReturn(PETSC_SUCCESS);
1583 }
1584 
1585 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1586 {
1587   PetscFunctionBegin;
1588   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1589   if (reuse == MAT_INITIAL_MATRIX) {
1590     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1591   } else if (reuse == MAT_REUSE_MATRIX) {
1592     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1593   }
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1598 {
1599   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1600   Mat           a = baij->A, b = baij->B;
1601   PetscInt      nv, m, n;
1602   PetscBool     flg;
1603 
1604   PetscFunctionBegin;
1605   if (ll != rr) {
1606     PetscCall(VecEqual(ll, rr, &flg));
1607     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1608   }
1609   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1610 
1611   PetscCall(MatGetLocalSize(mat, &m, &n));
1612   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1613 
1614   PetscCall(VecGetLocalSize(rr, &nv));
1615   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1616 
1617   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1618 
1619   /* left diagonalscale the off-diagonal part */
1620   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1621 
1622   /* scale the diagonal part */
1623   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1624 
1625   /* right diagonalscale the off-diagonal part */
1626   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1627   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1628   PetscFunctionReturn(PETSC_SUCCESS);
1629 }
1630 
1631 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1632 {
1633   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1634 
1635   PetscFunctionBegin;
1636   PetscCall(MatSetUnfactored(a->A));
1637   PetscFunctionReturn(PETSC_SUCCESS);
1638 }
1639 
1640 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1641 
1642 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1643 {
1644   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1645   Mat           a, b, c, d;
1646   PetscBool     flg;
1647 
1648   PetscFunctionBegin;
1649   a = matA->A;
1650   b = matA->B;
1651   c = matB->A;
1652   d = matB->B;
1653 
1654   PetscCall(MatEqual(a, c, &flg));
1655   if (flg) PetscCall(MatEqual(b, d, &flg));
1656   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1657   PetscFunctionReturn(PETSC_SUCCESS);
1658 }
1659 
1660 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1661 {
1662   PetscBool isbaij;
1663 
1664   PetscFunctionBegin;
1665   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1666   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1667   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1668   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1669     PetscCall(MatGetRowUpperTriangular(A));
1670     PetscCall(MatCopy_Basic(A, B, str));
1671     PetscCall(MatRestoreRowUpperTriangular(A));
1672   } else {
1673     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1674     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1675 
1676     PetscCall(MatCopy(a->A, b->A, str));
1677     PetscCall(MatCopy(a->B, b->B, str));
1678   }
1679   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1680   PetscFunctionReturn(PETSC_SUCCESS);
1681 }
1682 
1683 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1684 {
1685   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1686   PetscBLASInt  bnz, one                          = 1;
1687   Mat_SeqSBAIJ *xa, *ya;
1688   Mat_SeqBAIJ  *xb, *yb;
1689 
1690   PetscFunctionBegin;
1691   if (str == SAME_NONZERO_PATTERN) {
1692     PetscScalar alpha = a;
1693     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1694     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1695     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1696     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1697     xb = (Mat_SeqBAIJ *)xx->B->data;
1698     yb = (Mat_SeqBAIJ *)yy->B->data;
1699     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1700     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1701     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1702   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1703     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1704     PetscCall(MatAXPY_Basic(Y, a, X, str));
1705     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1706   } else {
1707     Mat       B;
1708     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1709     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1710     PetscCall(MatGetRowUpperTriangular(X));
1711     PetscCall(MatGetRowUpperTriangular(Y));
1712     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1713     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1714     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1715     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1716     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1717     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1718     PetscCall(MatSetType(B, MATMPISBAIJ));
1719     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1720     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1721     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1722     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1723     PetscCall(MatHeaderMerge(Y, &B));
1724     PetscCall(PetscFree(nnz_d));
1725     PetscCall(PetscFree(nnz_o));
1726     PetscCall(MatRestoreRowUpperTriangular(X));
1727     PetscCall(MatRestoreRowUpperTriangular(Y));
1728   }
1729   PetscFunctionReturn(PETSC_SUCCESS);
1730 }
1731 
1732 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1733 {
1734   PetscInt  i;
1735   PetscBool flg;
1736 
1737   PetscFunctionBegin;
1738   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1739   for (i = 0; i < n; i++) {
1740     PetscCall(ISEqual(irow[i], icol[i], &flg));
1741     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1742   }
1743   PetscFunctionReturn(PETSC_SUCCESS);
1744 }
1745 
1746 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1747 {
1748   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1749   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1750 
1751   PetscFunctionBegin;
1752   if (!Y->preallocated) {
1753     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1754   } else if (!aij->nz) {
1755     PetscInt nonew = aij->nonew;
1756     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1757     aij->nonew = nonew;
1758   }
1759   PetscCall(MatShift_Basic(Y, a));
1760   PetscFunctionReturn(PETSC_SUCCESS);
1761 }
1762 
1763 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1764 {
1765   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1766 
1767   PetscFunctionBegin;
1768   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1769   PetscCall(MatMissingDiagonal(a->A, missing, d));
1770   if (d) {
1771     PetscInt rstart;
1772     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1773     *d += rstart / A->rmap->bs;
1774   }
1775   PetscFunctionReturn(PETSC_SUCCESS);
1776 }
1777 
1778 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1779 {
1780   PetscFunctionBegin;
1781   *a = ((Mat_MPISBAIJ *)A->data)->A;
1782   PetscFunctionReturn(PETSC_SUCCESS);
1783 }
1784 
1785 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1786 {
1787   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1788 
1789   PetscFunctionBegin;
1790   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1791   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1792   PetscFunctionReturn(PETSC_SUCCESS);
1793 }
1794 
1795 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1796 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1797 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1798 
1799 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1800                                        MatGetRow_MPISBAIJ,
1801                                        MatRestoreRow_MPISBAIJ,
1802                                        MatMult_MPISBAIJ,
1803                                        /*  4*/ MatMultAdd_MPISBAIJ,
1804                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1805                                        MatMultAdd_MPISBAIJ,
1806                                        NULL,
1807                                        NULL,
1808                                        NULL,
1809                                        /* 10*/ NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        MatSOR_MPISBAIJ,
1813                                        MatTranspose_MPISBAIJ,
1814                                        /* 15*/ MatGetInfo_MPISBAIJ,
1815                                        MatEqual_MPISBAIJ,
1816                                        MatGetDiagonal_MPISBAIJ,
1817                                        MatDiagonalScale_MPISBAIJ,
1818                                        MatNorm_MPISBAIJ,
1819                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1820                                        MatAssemblyEnd_MPISBAIJ,
1821                                        MatSetOption_MPISBAIJ,
1822                                        MatZeroEntries_MPISBAIJ,
1823                                        /* 24*/ NULL,
1824                                        NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        /* 29*/ MatSetUp_MPI_Hash,
1829                                        NULL,
1830                                        NULL,
1831                                        MatGetDiagonalBlock_MPISBAIJ,
1832                                        NULL,
1833                                        /* 34*/ MatDuplicate_MPISBAIJ,
1834                                        NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        /* 39*/ MatAXPY_MPISBAIJ,
1839                                        MatCreateSubMatrices_MPISBAIJ,
1840                                        MatIncreaseOverlap_MPISBAIJ,
1841                                        MatGetValues_MPISBAIJ,
1842                                        MatCopy_MPISBAIJ,
1843                                        /* 44*/ NULL,
1844                                        MatScale_MPISBAIJ,
1845                                        MatShift_MPISBAIJ,
1846                                        NULL,
1847                                        NULL,
1848                                        /* 49*/ NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        /* 54*/ NULL,
1854                                        NULL,
1855                                        MatSetUnfactored_MPISBAIJ,
1856                                        NULL,
1857                                        MatSetValuesBlocked_MPISBAIJ,
1858                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1859                                        NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        /* 64*/ NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1869                                        NULL,
1870                                        MatConvert_MPISBAIJ_Basic,
1871                                        NULL,
1872                                        NULL,
1873                                        /* 74*/ NULL,
1874                                        NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        /* 79*/ NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        MatLoad_MPISBAIJ,
1883                                        /* 84*/ NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        /* 89*/ NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        /* 94*/ NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        /* 99*/ NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        MatConjugate_MPISBAIJ,
1902                                        NULL,
1903                                        /*104*/ NULL,
1904                                        MatRealPart_MPISBAIJ,
1905                                        MatImaginaryPart_MPISBAIJ,
1906                                        MatGetRowUpperTriangular_MPISBAIJ,
1907                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1908                                        /*109*/ NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        NULL,
1912                                        MatMissingDiagonal_MPISBAIJ,
1913                                        /*114*/ NULL,
1914                                        NULL,
1915                                        NULL,
1916                                        NULL,
1917                                        NULL,
1918                                        /*119*/ NULL,
1919                                        NULL,
1920                                        NULL,
1921                                        NULL,
1922                                        NULL,
1923                                        /*124*/ NULL,
1924                                        NULL,
1925                                        NULL,
1926                                        NULL,
1927                                        NULL,
1928                                        /*129*/ NULL,
1929                                        NULL,
1930                                        NULL,
1931                                        NULL,
1932                                        NULL,
1933                                        /*134*/ NULL,
1934                                        NULL,
1935                                        NULL,
1936                                        NULL,
1937                                        NULL,
1938                                        /*139*/ MatSetBlockSizes_Default,
1939                                        NULL,
1940                                        NULL,
1941                                        NULL,
1942                                        NULL,
1943                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1944                                        NULL,
1945                                        NULL,
1946                                        NULL,
1947                                        NULL,
1948                                        NULL,
1949                                        /*150*/ NULL,
1950                                        MatEliminateZeros_MPISBAIJ,
1951                                        NULL,
1952                                        NULL,
1953                                        NULL,
1954                                        NULL};
1955 
1956 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1957 {
1958   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1959   PetscInt      i, mbs, Mbs;
1960   PetscMPIInt   size;
1961 
1962   PetscFunctionBegin;
1963   if (B->hash_active) {
1964     B->ops[0]      = b->cops;
1965     B->hash_active = PETSC_FALSE;
1966   }
1967   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1968   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1969   PetscCall(PetscLayoutSetUp(B->rmap));
1970   PetscCall(PetscLayoutSetUp(B->cmap));
1971   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1972   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1973   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1974 
1975   mbs = B->rmap->n / bs;
1976   Mbs = B->rmap->N / bs;
1977   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1978 
1979   B->rmap->bs = bs;
1980   b->bs2      = bs * bs;
1981   b->mbs      = mbs;
1982   b->Mbs      = Mbs;
1983   b->nbs      = B->cmap->n / bs;
1984   b->Nbs      = B->cmap->N / bs;
1985 
1986   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1987   b->rstartbs = B->rmap->rstart / bs;
1988   b->rendbs   = B->rmap->rend / bs;
1989 
1990   b->cstartbs = B->cmap->rstart / bs;
1991   b->cendbs   = B->cmap->rend / bs;
1992 
1993 #if defined(PETSC_USE_CTABLE)
1994   PetscCall(PetscHMapIDestroy(&b->colmap));
1995 #else
1996   PetscCall(PetscFree(b->colmap));
1997 #endif
1998   PetscCall(PetscFree(b->garray));
1999   PetscCall(VecDestroy(&b->lvec));
2000   PetscCall(VecScatterDestroy(&b->Mvctx));
2001   PetscCall(VecDestroy(&b->slvec0));
2002   PetscCall(VecDestroy(&b->slvec0b));
2003   PetscCall(VecDestroy(&b->slvec1));
2004   PetscCall(VecDestroy(&b->slvec1a));
2005   PetscCall(VecDestroy(&b->slvec1b));
2006   PetscCall(VecScatterDestroy(&b->sMvctx));
2007 
2008   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2009 
2010   MatSeqXAIJGetOptions_Private(b->B);
2011   PetscCall(MatDestroy(&b->B));
2012   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2013   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2014   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2015   MatSeqXAIJRestoreOptions_Private(b->B);
2016 
2017   MatSeqXAIJGetOptions_Private(b->A);
2018   PetscCall(MatDestroy(&b->A));
2019   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2020   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2021   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
2022   MatSeqXAIJRestoreOptions_Private(b->A);
2023 
2024   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2025   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2026 
2027   B->preallocated  = PETSC_TRUE;
2028   B->was_assembled = PETSC_FALSE;
2029   B->assembled     = PETSC_FALSE;
2030   PetscFunctionReturn(PETSC_SUCCESS);
2031 }
2032 
2033 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2034 {
2035   PetscInt        m, rstart, cend;
2036   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2037   const PetscInt *JJ          = NULL;
2038   PetscScalar    *values      = NULL;
2039   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2040   PetscBool       nooffprocentries;
2041 
2042   PetscFunctionBegin;
2043   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2044   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2045   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2046   PetscCall(PetscLayoutSetUp(B->rmap));
2047   PetscCall(PetscLayoutSetUp(B->cmap));
2048   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2049   m      = B->rmap->n / bs;
2050   rstart = B->rmap->rstart / bs;
2051   cend   = B->cmap->rend / bs;
2052 
2053   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2054   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2055   for (i = 0; i < m; i++) {
2056     nz = ii[i + 1] - ii[i];
2057     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2058     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2059     JJ = jj + ii[i];
2060     bd = 0;
2061     for (j = 0; j < nz; j++) {
2062       if (*JJ >= i + rstart) break;
2063       JJ++;
2064       bd++;
2065     }
2066     d = 0;
2067     for (; j < nz; j++) {
2068       if (*JJ++ >= cend) break;
2069       d++;
2070     }
2071     d_nnz[i] = d;
2072     o_nnz[i] = nz - d - bd;
2073     nz       = nz - bd;
2074     nz_max   = PetscMax(nz_max, nz);
2075   }
2076   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2077   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2078   PetscCall(PetscFree2(d_nnz, o_nnz));
2079 
2080   values = (PetscScalar *)V;
2081   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2082   for (i = 0; i < m; i++) {
2083     PetscInt        row   = i + rstart;
2084     PetscInt        ncols = ii[i + 1] - ii[i];
2085     const PetscInt *icols = jj + ii[i];
2086     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2087       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2088       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2089     } else { /* block ordering does not match so we can only insert one block at a time. */
2090       PetscInt j;
2091       for (j = 0; j < ncols; j++) {
2092         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2093         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2094       }
2095     }
2096   }
2097 
2098   if (!V) PetscCall(PetscFree(values));
2099   nooffprocentries    = B->nooffprocentries;
2100   B->nooffprocentries = PETSC_TRUE;
2101   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2102   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2103   B->nooffprocentries = nooffprocentries;
2104 
2105   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2106   PetscFunctionReturn(PETSC_SUCCESS);
2107 }
2108 
2109 /*MC
2110    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2111    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2112    the matrix is stored.
2113 
2114    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2115    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2116 
2117    Options Database Key:
2118 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2119 
2120    Level: beginner
2121 
2122    Note:
2123      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2124      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2125 
2126 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2127 M*/
2128 
2129 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2130 {
2131   Mat_MPISBAIJ *b;
2132   PetscBool     flg = PETSC_FALSE;
2133 
2134   PetscFunctionBegin;
2135   PetscCall(PetscNew(&b));
2136   B->data   = (void *)b;
2137   B->ops[0] = MatOps_Values;
2138 
2139   B->ops->destroy = MatDestroy_MPISBAIJ;
2140   B->ops->view    = MatView_MPISBAIJ;
2141   B->assembled    = PETSC_FALSE;
2142   B->insertmode   = NOT_SET_VALUES;
2143 
2144   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2145   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2146 
2147   /* build local table of row and column ownerships */
2148   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2149 
2150   /* build cache for off array entries formed */
2151   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2152 
2153   b->donotstash  = PETSC_FALSE;
2154   b->colmap      = NULL;
2155   b->garray      = NULL;
2156   b->roworiented = PETSC_TRUE;
2157 
2158   /* stuff used in block assembly */
2159   b->barray = NULL;
2160 
2161   /* stuff used for matrix vector multiply */
2162   b->lvec    = NULL;
2163   b->Mvctx   = NULL;
2164   b->slvec0  = NULL;
2165   b->slvec0b = NULL;
2166   b->slvec1  = NULL;
2167   b->slvec1a = NULL;
2168   b->slvec1b = NULL;
2169   b->sMvctx  = NULL;
2170 
2171   /* stuff for MatGetRow() */
2172   b->rowindices   = NULL;
2173   b->rowvalues    = NULL;
2174   b->getrowactive = PETSC_FALSE;
2175 
2176   /* hash table stuff */
2177   b->ht           = NULL;
2178   b->hd           = NULL;
2179   b->ht_size      = 0;
2180   b->ht_flag      = PETSC_FALSE;
2181   b->ht_fact      = 0;
2182   b->ht_total_ct  = 0;
2183   b->ht_insert_ct = 0;
2184 
2185   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2186   b->ijonly = PETSC_FALSE;
2187 
2188   b->in_loc = NULL;
2189   b->v_loc  = NULL;
2190   b->n_loc  = 0;
2191 
2192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2196 #if defined(PETSC_HAVE_ELEMENTAL)
2197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2198 #endif
2199 #if defined(PETSC_HAVE_SCALAPACK)
2200   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2201 #endif
2202   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2203   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2204 
2205   B->symmetric                   = PETSC_BOOL3_TRUE;
2206   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2207   B->symmetry_eternal            = PETSC_TRUE;
2208   B->structural_symmetry_eternal = PETSC_TRUE;
2209 #if defined(PETSC_USE_COMPLEX)
2210   B->hermitian = PETSC_BOOL3_FALSE;
2211 #else
2212   B->hermitian = PETSC_BOOL3_TRUE;
2213 #endif
2214 
2215   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2216   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2217   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2218   if (flg) {
2219     PetscReal fact = 1.39;
2220     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2221     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2222     if (fact <= 1.0) fact = 1.39;
2223     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2224     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2225   }
2226   PetscOptionsEnd();
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2231 /*MC
2232    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2233 
2234    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2235    and `MATMPISBAIJ` otherwise.
2236 
2237    Options Database Key:
2238 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2239 
2240   Level: beginner
2241 
2242 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2243 M*/
2244 
2245 /*@
2246   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2247   the user should preallocate the matrix storage by setting the parameters
2248   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2249   performance can be increased by more than a factor of 50.
2250 
2251   Collective
2252 
2253   Input Parameters:
2254 + B     - the matrix
2255 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2256           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2257 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2258           submatrix  (same for all local rows)
2259 . d_nnz - array containing the number of block nonzeros in the various block rows
2260           in the upper triangular and diagonal part of the in diagonal portion of the local
2261           (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2262           for the diagonal entry and set a value even if it is zero.
2263 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2264           submatrix (same for all local rows).
2265 - o_nnz - array containing the number of nonzeros in the various block rows of the
2266           off-diagonal portion of the local submatrix that is right of the diagonal
2267           (possibly different for each block row) or `NULL`.
2268 
2269   Options Database Keys:
2270 + -mat_no_unroll  - uses code that does not unroll the loops in the
2271                     block calculations (much slower)
2272 - -mat_block_size - size of the blocks to use
2273 
2274   Level: intermediate
2275 
2276   Notes:
2277 
2278   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2279   than it must be used on all processors that share the object for that argument.
2280 
2281   If the *_nnz parameter is given then the *_nz parameter is ignored
2282 
2283   Storage Information:
2284   For a square global matrix we define each processor's diagonal portion
2285   to be its local rows and the corresponding columns (a square submatrix);
2286   each processor's off-diagonal portion encompasses the remainder of the
2287   local matrix (a rectangular submatrix).
2288 
2289   The user can specify preallocated storage for the diagonal part of
2290   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2291   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2292   memory allocation.  Likewise, specify preallocated storage for the
2293   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2294 
2295   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2296   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2297   You can also run with the option `-info` and look for messages with the string
2298   malloc in them to see if additional memory allocation was needed.
2299 
2300   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2301   the figure below we depict these three local rows and all columns (0-11).
2302 
2303 .vb
2304            0 1 2 3 4 5 6 7 8 9 10 11
2305           --------------------------
2306    row 3  |. . . d d d o o o o  o  o
2307    row 4  |. . . d d d o o o o  o  o
2308    row 5  |. . . d d d o o o o  o  o
2309           --------------------------
2310 .ve
2311 
2312   Thus, any entries in the d locations are stored in the d (diagonal)
2313   submatrix, and any entries in the o locations are stored in the
2314   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2315   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2316 
2317   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2318   plus the diagonal part of the d matrix,
2319   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2320 
2321   In general, for PDE problems in which most nonzeros are near the diagonal,
2322   one expects `d_nz` >> `o_nz`.
2323 
2324 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2325 @*/
2326 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2327 {
2328   PetscFunctionBegin;
2329   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2330   PetscValidType(B, 1);
2331   PetscValidLogicalCollectiveInt(B, bs, 2);
2332   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2333   PetscFunctionReturn(PETSC_SUCCESS);
2334 }
2335 
2336 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2337 /*@
2338   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2339   (block compressed row).  For good matrix assembly performance
2340   the user should preallocate the matrix storage by setting the parameters
2341   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2342 
2343   Collective
2344 
2345   Input Parameters:
2346 + comm  - MPI communicator
2347 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2348           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2349 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2350           This value should be the same as the local size used in creating the
2351           y vector for the matrix-vector product y = Ax.
2352 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2353           This value should be the same as the local size used in creating the
2354           x vector for the matrix-vector product y = Ax.
2355 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2356 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2357 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2358           submatrix (same for all local rows)
2359 . d_nnz - array containing the number of block nonzeros in the various block rows
2360           in the upper triangular portion of the in diagonal portion of the local
2361           (possibly different for each block block row) or `NULL`.
2362           If you plan to factor the matrix you must leave room for the diagonal entry and
2363           set its value even if it is zero.
2364 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2365           submatrix (same for all local rows).
2366 - o_nnz - array containing the number of nonzeros in the various block rows of the
2367           off-diagonal portion of the local submatrix (possibly different for
2368           each block row) or `NULL`.
2369 
2370   Output Parameter:
2371 . A - the matrix
2372 
2373   Options Database Keys:
2374 + -mat_no_unroll  - uses code that does not unroll the loops in the
2375                     block calculations (much slower)
2376 . -mat_block_size - size of the blocks to use
2377 - -mat_mpi        - use the parallel matrix data structures even on one processor
2378                     (defaults to using SeqBAIJ format on one processor)
2379 
2380   Level: intermediate
2381 
2382   Notes:
2383   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2384   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2385   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2386 
2387   The number of rows and columns must be divisible by blocksize.
2388   This matrix type does not support complex Hermitian operation.
2389 
2390   The user MUST specify either the local or global matrix dimensions
2391   (possibly both).
2392 
2393   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2394   than it must be used on all processors that share the object for that argument.
2395 
2396   If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2397   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2398 
2399   If the *_nnz parameter is given then the *_nz parameter is ignored
2400 
2401   Storage Information:
2402   For a square global matrix we define each processor's diagonal portion
2403   to be its local rows and the corresponding columns (a square submatrix);
2404   each processor's off-diagonal portion encompasses the remainder of the
2405   local matrix (a rectangular submatrix).
2406 
2407   The user can specify preallocated storage for the diagonal part of
2408   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2409   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2410   memory allocation. Likewise, specify preallocated storage for the
2411   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2412 
2413   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2414   the figure below we depict these three local rows and all columns (0-11).
2415 
2416 .vb
2417            0 1 2 3 4 5 6 7 8 9 10 11
2418           --------------------------
2419    row 3  |. . . d d d o o o o  o  o
2420    row 4  |. . . d d d o o o o  o  o
2421    row 5  |. . . d d d o o o o  o  o
2422           --------------------------
2423 .ve
2424 
2425   Thus, any entries in the d locations are stored in the d (diagonal)
2426   submatrix, and any entries in the o locations are stored in the
2427   o (off-diagonal) submatrix. Note that the d matrix is stored in
2428   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2429 
2430   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2431   plus the diagonal part of the d matrix,
2432   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2433   In general, for PDE problems in which most nonzeros are near the diagonal,
2434   one expects `d_nz` >> `o_nz`.
2435 
2436 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2437           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2438 @*/
2439 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2440 {
2441   PetscMPIInt size;
2442 
2443   PetscFunctionBegin;
2444   PetscCall(MatCreate(comm, A));
2445   PetscCall(MatSetSizes(*A, m, n, M, N));
2446   PetscCallMPI(MPI_Comm_size(comm, &size));
2447   if (size > 1) {
2448     PetscCall(MatSetType(*A, MATMPISBAIJ));
2449     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2450   } else {
2451     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2452     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2453   }
2454   PetscFunctionReturn(PETSC_SUCCESS);
2455 }
2456 
2457 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2458 {
2459   Mat           mat;
2460   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2461   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2462   PetscScalar  *array;
2463 
2464   PetscFunctionBegin;
2465   *newmat = NULL;
2466 
2467   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2468   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2469   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2470   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2471   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2472 
2473   if (matin->hash_active) {
2474     PetscCall(MatSetUp(mat));
2475   } else {
2476     mat->factortype   = matin->factortype;
2477     mat->preallocated = PETSC_TRUE;
2478     mat->assembled    = PETSC_TRUE;
2479     mat->insertmode   = NOT_SET_VALUES;
2480 
2481     a      = (Mat_MPISBAIJ *)mat->data;
2482     a->bs2 = oldmat->bs2;
2483     a->mbs = oldmat->mbs;
2484     a->nbs = oldmat->nbs;
2485     a->Mbs = oldmat->Mbs;
2486     a->Nbs = oldmat->Nbs;
2487 
2488     a->size         = oldmat->size;
2489     a->rank         = oldmat->rank;
2490     a->donotstash   = oldmat->donotstash;
2491     a->roworiented  = oldmat->roworiented;
2492     a->rowindices   = NULL;
2493     a->rowvalues    = NULL;
2494     a->getrowactive = PETSC_FALSE;
2495     a->barray       = NULL;
2496     a->rstartbs     = oldmat->rstartbs;
2497     a->rendbs       = oldmat->rendbs;
2498     a->cstartbs     = oldmat->cstartbs;
2499     a->cendbs       = oldmat->cendbs;
2500 
2501     /* hash table stuff */
2502     a->ht           = NULL;
2503     a->hd           = NULL;
2504     a->ht_size      = 0;
2505     a->ht_flag      = oldmat->ht_flag;
2506     a->ht_fact      = oldmat->ht_fact;
2507     a->ht_total_ct  = 0;
2508     a->ht_insert_ct = 0;
2509 
2510     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2511     if (oldmat->colmap) {
2512 #if defined(PETSC_USE_CTABLE)
2513       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2514 #else
2515       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2516       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2517 #endif
2518     } else a->colmap = NULL;
2519 
2520     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2521       PetscCall(PetscMalloc1(len, &a->garray));
2522       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2523     } else a->garray = NULL;
2524 
2525     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2526     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2527     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2528 
2529     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2530     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2531 
2532     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2533     PetscCall(VecGetArray(a->slvec1, &array));
2534     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2535     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2536     PetscCall(VecRestoreArray(a->slvec1, &array));
2537     PetscCall(VecGetArray(a->slvec0, &array));
2538     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2539     PetscCall(VecRestoreArray(a->slvec0, &array));
2540 
2541     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2542     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2543     a->sMvctx = oldmat->sMvctx;
2544 
2545     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2546     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2547   }
2548   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2549   *newmat = mat;
2550   PetscFunctionReturn(PETSC_SUCCESS);
2551 }
2552 
2553 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2554 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2555 
2556 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2557 {
2558   PetscBool isbinary;
2559 
2560   PetscFunctionBegin;
2561   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2562   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2563   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2564   PetscFunctionReturn(PETSC_SUCCESS);
2565 }
2566 
2567 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2568 {
2569   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2570   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2571   PetscReal     atmp;
2572   PetscReal    *work, *svalues, *rvalues;
2573   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2574   PetscMPIInt   rank, size;
2575   PetscInt     *rowners_bs, count, source;
2576   PetscScalar  *va;
2577   MatScalar    *ba;
2578   MPI_Status    stat;
2579 
2580   PetscFunctionBegin;
2581   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2582   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2583   PetscCall(VecGetArray(v, &va));
2584 
2585   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2586   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2587 
2588   bs  = A->rmap->bs;
2589   mbs = a->mbs;
2590   Mbs = a->Mbs;
2591   ba  = b->a;
2592   bi  = b->i;
2593   bj  = b->j;
2594 
2595   /* find ownerships */
2596   rowners_bs = A->rmap->range;
2597 
2598   /* each proc creates an array to be distributed */
2599   PetscCall(PetscCalloc1(bs * Mbs, &work));
2600 
2601   /* row_max for B */
2602   if (rank != size - 1) {
2603     for (i = 0; i < mbs; i++) {
2604       ncols = bi[1] - bi[0];
2605       bi++;
2606       brow = bs * i;
2607       for (j = 0; j < ncols; j++) {
2608         bcol = bs * (*bj);
2609         for (kcol = 0; kcol < bs; kcol++) {
2610           col = bcol + kcol;           /* local col index */
2611           col += rowners_bs[rank + 1]; /* global col index */
2612           for (krow = 0; krow < bs; krow++) {
2613             atmp = PetscAbsScalar(*ba);
2614             ba++;
2615             row = brow + krow; /* local row index */
2616             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2617             if (work[col] < atmp) work[col] = atmp;
2618           }
2619         }
2620         bj++;
2621       }
2622     }
2623 
2624     /* send values to its owners */
2625     for (PetscMPIInt dest = rank + 1; dest < size; dest++) {
2626       svalues = work + rowners_bs[dest];
2627       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2628       PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2629     }
2630   }
2631 
2632   /* receive values */
2633   if (rank) {
2634     rvalues = work;
2635     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2636     for (source = 0; source < rank; source++) {
2637       PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2638       /* process values */
2639       for (i = 0; i < count; i++) {
2640         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2641       }
2642     }
2643   }
2644 
2645   PetscCall(VecRestoreArray(v, &va));
2646   PetscCall(PetscFree(work));
2647   PetscFunctionReturn(PETSC_SUCCESS);
2648 }
2649 
2650 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2651 {
2652   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2653   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2654   PetscScalar       *x, *ptr, *from;
2655   Vec                bb1;
2656   const PetscScalar *b;
2657 
2658   PetscFunctionBegin;
2659   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2660   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2661 
2662   if (flag == SOR_APPLY_UPPER) {
2663     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2664     PetscFunctionReturn(PETSC_SUCCESS);
2665   }
2666 
2667   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2668     if (flag & SOR_ZERO_INITIAL_GUESS) {
2669       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2670       its--;
2671     }
2672 
2673     PetscCall(VecDuplicate(bb, &bb1));
2674     while (its--) {
2675       /* lower triangular part: slvec0b = - B^T*xx */
2676       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2677 
2678       /* copy xx into slvec0a */
2679       PetscCall(VecGetArray(mat->slvec0, &ptr));
2680       PetscCall(VecGetArray(xx, &x));
2681       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2682       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2683 
2684       PetscCall(VecScale(mat->slvec0, -1.0));
2685 
2686       /* copy bb into slvec1a */
2687       PetscCall(VecGetArray(mat->slvec1, &ptr));
2688       PetscCall(VecGetArrayRead(bb, &b));
2689       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2690       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2691 
2692       /* set slvec1b = 0 */
2693       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2694       PetscCall(VecZeroEntries(mat->slvec1b));
2695 
2696       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2697       PetscCall(VecRestoreArray(xx, &x));
2698       PetscCall(VecRestoreArrayRead(bb, &b));
2699       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2700 
2701       /* upper triangular part: bb1 = bb1 - B*x */
2702       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2703 
2704       /* local diagonal sweep */
2705       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2706     }
2707     PetscCall(VecDestroy(&bb1));
2708   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2709     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2710   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2711     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2712   } else if (flag & SOR_EISENSTAT) {
2713     Vec                xx1;
2714     PetscBool          hasop;
2715     const PetscScalar *diag;
2716     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2717     PetscInt           i, n;
2718 
2719     if (!mat->xx1) {
2720       PetscCall(VecDuplicate(bb, &mat->xx1));
2721       PetscCall(VecDuplicate(bb, &mat->bb1));
2722     }
2723     xx1 = mat->xx1;
2724     bb1 = mat->bb1;
2725 
2726     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2727 
2728     if (!mat->diag) {
2729       /* this is wrong for same matrix with new nonzero values */
2730       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2731       PetscCall(MatGetDiagonal(matin, mat->diag));
2732     }
2733     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2734 
2735     if (hasop) {
2736       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2737       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2738     } else {
2739       /*
2740           These two lines are replaced by code that may be a bit faster for a good compiler
2741       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2742       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2743       */
2744       PetscCall(VecGetArray(mat->slvec1a, &sl));
2745       PetscCall(VecGetArrayRead(mat->diag, &diag));
2746       PetscCall(VecGetArrayRead(bb, &b));
2747       PetscCall(VecGetArray(xx, &x));
2748       PetscCall(VecGetLocalSize(xx, &n));
2749       if (omega == 1.0) {
2750         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2751         PetscCall(PetscLogFlops(2.0 * n));
2752       } else {
2753         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2754         PetscCall(PetscLogFlops(3.0 * n));
2755       }
2756       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2757       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2758       PetscCall(VecRestoreArrayRead(bb, &b));
2759       PetscCall(VecRestoreArray(xx, &x));
2760     }
2761 
2762     /* multiply off-diagonal portion of matrix */
2763     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2764     PetscCall(VecZeroEntries(mat->slvec1b));
2765     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2766     PetscCall(VecGetArray(mat->slvec0, &from));
2767     PetscCall(VecGetArray(xx, &x));
2768     PetscCall(PetscArraycpy(from, x, bs * mbs));
2769     PetscCall(VecRestoreArray(mat->slvec0, &from));
2770     PetscCall(VecRestoreArray(xx, &x));
2771     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2772     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2773     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2774 
2775     /* local sweep */
2776     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2777     PetscCall(VecAXPY(xx, 1.0, xx1));
2778   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2779   PetscFunctionReturn(PETSC_SUCCESS);
2780 }
2781 
2782 /*@
2783   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2784 
2785   Collective
2786 
2787   Input Parameters:
2788 + comm - MPI communicator
2789 . bs   - the block size, only a block size of 1 is supported
2790 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2791 . n    - This value should be the same as the local size used in creating the
2792          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2793          calculated if `N` is given) For square matrices `n` is almost always `m`.
2794 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2795 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2796 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2797 . j    - column indices
2798 - a    - matrix values
2799 
2800   Output Parameter:
2801 . mat - the matrix
2802 
2803   Level: intermediate
2804 
2805   Notes:
2806   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2807   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2808   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2809 
2810   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2811 
2812 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2813           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2814 @*/
2815 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2816 {
2817   PetscFunctionBegin;
2818   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2819   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2820   PetscCall(MatCreate(comm, mat));
2821   PetscCall(MatSetSizes(*mat, m, n, M, N));
2822   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2823   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2824   PetscFunctionReturn(PETSC_SUCCESS);
2825 }
2826 
2827 /*@
2828   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2829 
2830   Collective
2831 
2832   Input Parameters:
2833 + B  - the matrix
2834 . bs - the block size
2835 . i  - the indices into `j` for the start of each local row (indices start with zero)
2836 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2837 - v  - optional values in the matrix, pass `NULL` if not provided
2838 
2839   Level: advanced
2840 
2841   Notes:
2842   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2843   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2844   called this routine.
2845 
2846   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2847   and usually the numerical values as well
2848 
2849   Any entries passed in that are below the diagonal are ignored
2850 
2851 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2852           `MatCreateMPISBAIJWithArrays()`
2853 @*/
2854 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2855 {
2856   PetscFunctionBegin;
2857   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2858   PetscFunctionReturn(PETSC_SUCCESS);
2859 }
2860 
2861 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2862 {
2863   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2864   PetscInt    *indx;
2865   PetscScalar *values;
2866 
2867   PetscFunctionBegin;
2868   PetscCall(MatGetSize(inmat, &m, &N));
2869   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2870     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2871     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2872     PetscInt     *bindx, rmax = a->rmax, j;
2873     PetscMPIInt   rank, size;
2874 
2875     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2876     mbs = m / bs;
2877     Nbs = N / cbs;
2878     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2879     nbs = n / cbs;
2880 
2881     PetscCall(PetscMalloc1(rmax, &bindx));
2882     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2883 
2884     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2885     PetscCallMPI(MPI_Comm_rank(comm, &size));
2886     if (rank == size - 1) {
2887       /* Check sum(nbs) = Nbs */
2888       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2889     }
2890 
2891     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2892     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2893     for (i = 0; i < mbs; i++) {
2894       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2895       nnz = nnz / bs;
2896       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2897       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2898       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2899     }
2900     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2901     PetscCall(PetscFree(bindx));
2902 
2903     PetscCall(MatCreate(comm, outmat));
2904     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2905     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2906     PetscCall(MatSetType(*outmat, MATSBAIJ));
2907     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2908     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2909     MatPreallocateEnd(dnz, onz);
2910   }
2911 
2912   /* numeric phase */
2913   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2914   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2915 
2916   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2917   for (i = 0; i < m; i++) {
2918     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2919     Ii = i + rstart;
2920     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2921     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2922   }
2923   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2924   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2925   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2926   PetscFunctionReturn(PETSC_SUCCESS);
2927 }
2928