xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) {
16   Mat       preallocator;
17   PetscInt  r, rstart, rend;
18   PetscInt  bs, i, m, n, M, N;
19   PetscBool cong = PETSC_TRUE;
20 
21   PetscFunctionBegin;
22   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
23   PetscValidLogicalCollectiveInt(B, nm, 2);
24   for (i = 0; i < nm; i++) {
25     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
26     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
27     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
28   }
29   PetscValidLogicalCollectiveBool(B, fill, 5);
30   PetscCall(MatGetBlockSize(B, &bs));
31   PetscCall(MatGetSize(B, &M, &N));
32   PetscCall(MatGetLocalSize(B, &m, &n));
33   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
34   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
35   PetscCall(MatSetBlockSize(preallocator, bs));
36   PetscCall(MatSetSizes(preallocator, m, n, M, N));
37   PetscCall(MatSetUp(preallocator));
38   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
39   for (r = rstart; r < rend; ++r) {
40     PetscInt           ncols;
41     const PetscInt    *row;
42     const PetscScalar *vals;
43 
44     for (i = 0; i < nm; i++) {
45       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
46       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
47       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
48       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
49     }
50   }
51   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
52   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
53   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
54   PetscCall(MatDestroy(&preallocator));
55   PetscFunctionReturn(0);
56 }
57 
58 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
59   Mat      B;
60   PetscInt r;
61 
62   PetscFunctionBegin;
63   if (reuse != MAT_REUSE_MATRIX) {
64     PetscBool symm = PETSC_TRUE, isdense;
65     PetscInt  bs;
66 
67     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
68     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
69     PetscCall(MatSetType(B, newtype));
70     PetscCall(MatGetBlockSize(A, &bs));
71     PetscCall(MatSetBlockSize(B, bs));
72     PetscCall(PetscLayoutSetUp(B->rmap));
73     PetscCall(PetscLayoutSetUp(B->cmap));
74     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
75     if (!isdense) {
76       PetscCall(MatGetRowUpperTriangular(A));
77       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
78       PetscCall(MatRestoreRowUpperTriangular(A));
79     } else {
80       PetscCall(MatSetUp(B));
81     }
82   } else {
83     B = *newmat;
84     PetscCall(MatZeroEntries(B));
85   }
86 
87   PetscCall(MatGetRowUpperTriangular(A));
88   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
89     PetscInt           ncols;
90     const PetscInt    *row;
91     const PetscScalar *vals;
92 
93     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
94     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
95 #if defined(PETSC_USE_COMPLEX)
96     if (A->hermitian == PETSC_BOOL3_TRUE) {
97       PetscInt i;
98       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
99     } else {
100       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
101     }
102 #else
103     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
104 #endif
105     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
106   }
107   PetscCall(MatRestoreRowUpperTriangular(A));
108   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
109   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
110 
111   if (reuse == MAT_INPLACE_MATRIX) {
112     PetscCall(MatHeaderReplace(A, &B));
113   } else {
114     *newmat = B;
115   }
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) {
120   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
121 
122   PetscFunctionBegin;
123   PetscCall(MatStoreValues(aij->A));
124   PetscCall(MatStoreValues(aij->B));
125   PetscFunctionReturn(0);
126 }
127 
128 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) {
129   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(0);
135 }
136 
137 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
138   { \
139     brow = row / bs; \
140     rp   = aj + ai[brow]; \
141     ap   = aa + bs2 * ai[brow]; \
142     rmax = aimax[brow]; \
143     nrow = ailen[brow]; \
144     bcol = col / bs; \
145     ridx = row % bs; \
146     cidx = col % bs; \
147     low  = 0; \
148     high = nrow; \
149     while (high - low > 3) { \
150       t = (low + high) / 2; \
151       if (rp[t] > bcol) high = t; \
152       else low = t; \
153     } \
154     for (_i = low; _i < high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap + bs2 * _i + bs * cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value; \
159         else *bap = value; \
160         goto a_noinsert; \
161       } \
162     } \
163     if (a->nonew == 1) goto a_noinsert; \
164     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
166     N = nrow++ - 1; \
167     /* shift up all the later entries in this row */ \
168     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
169     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
170     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
171     rp[_i]                          = bcol; \
172     ap[bs2 * _i + bs * cidx + ridx] = value; \
173     A->nonzerostate++; \
174   a_noinsert:; \
175     ailen[brow] = nrow; \
176   }
177 
178 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
179   { \
180     brow = row / bs; \
181     rp   = bj + bi[brow]; \
182     ap   = ba + bs2 * bi[brow]; \
183     rmax = bimax[brow]; \
184     nrow = bilen[brow]; \
185     bcol = col / bs; \
186     ridx = row % bs; \
187     cidx = col % bs; \
188     low  = 0; \
189     high = nrow; \
190     while (high - low > 3) { \
191       t = (low + high) / 2; \
192       if (rp[t] > bcol) high = t; \
193       else low = t; \
194     } \
195     for (_i = low; _i < high; _i++) { \
196       if (rp[_i] > bcol) break; \
197       if (rp[_i] == bcol) { \
198         bap = ap + bs2 * _i + bs * cidx + ridx; \
199         if (addv == ADD_VALUES) *bap += value; \
200         else *bap = value; \
201         goto b_noinsert; \
202       } \
203     } \
204     if (b->nonew == 1) goto b_noinsert; \
205     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
206     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
207     N = nrow++ - 1; \
208     /* shift up all the later entries in this row */ \
209     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
210     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
211     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
212     rp[_i]                          = bcol; \
213     ap[bs2 * _i + bs * cidx + ridx] = value; \
214     B->nonzerostate++; \
215   b_noinsert:; \
216     bilen[brow] = nrow; \
217   }
218 
219 /* Only add/insert a(i,j) with i<=j (blocks).
220    Any a(i,j) with i>j input by user is ingored or generates an error
221 */
222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
223   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
224   MatScalar     value;
225   PetscBool     roworiented = baij->roworiented;
226   PetscInt      i, j, row, col;
227   PetscInt      rstart_orig = mat->rmap->rstart;
228   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
229   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
230 
231   /* Some Variables required in the macro */
232   Mat           A     = baij->A;
233   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
234   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
235   MatScalar    *aa = a->a;
236 
237   Mat          B     = baij->B;
238   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
239   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
240   MatScalar   *ba = b->a;
241 
242   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
243   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
244   MatScalar *ap, *bap;
245 
246   /* for stash */
247   PetscInt   n_loc, *in_loc = NULL;
248   MatScalar *v_loc = NULL;
249 
250   PetscFunctionBegin;
251   if (!baij->donotstash) {
252     if (n > baij->n_loc) {
253       PetscCall(PetscFree(baij->in_loc));
254       PetscCall(PetscFree(baij->v_loc));
255       PetscCall(PetscMalloc1(n, &baij->in_loc));
256       PetscCall(PetscMalloc1(n, &baij->v_loc));
257 
258       baij->n_loc = n;
259     }
260     in_loc = baij->in_loc;
261     v_loc  = baij->v_loc;
262   }
263 
264   for (i = 0; i < m; i++) {
265     if (im[i] < 0) continue;
266     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
267     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
268       row = im[i] - rstart_orig;                     /* local row index */
269       for (j = 0; j < n; j++) {
270         if (im[i] / bs > in[j] / bs) {
271           if (a->ignore_ltriangular) {
272             continue; /* ignore lower triangular blocks */
273           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
274         }
275         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
276           col  = in[j] - cstart_orig;                    /* local col index */
277           brow = row / bs;
278           bcol = col / bs;
279           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
280           if (roworiented) value = v[i * n + j];
281           else value = v[i + j * m];
282           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
283           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
284         } else if (in[j] < 0) {
285           continue;
286         } else {
287           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
288           /* off-diag entry (B) */
289           if (mat->was_assembled) {
290             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
291 #if defined(PETSC_USE_CTABLE)
292             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
293             col = col - 1;
294 #else
295             col = baij->colmap[in[j] / bs] - 1;
296 #endif
297             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
298               PetscCall(MatDisAssemble_MPISBAIJ(mat));
299               col   = in[j];
300               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
301               B     = baij->B;
302               b     = (Mat_SeqBAIJ *)(B)->data;
303               bimax = b->imax;
304               bi    = b->i;
305               bilen = b->ilen;
306               bj    = b->j;
307               ba    = b->a;
308             } else col += in[j] % bs;
309           } else col = in[j];
310           if (roworiented) value = v[i * n + j];
311           else value = v[i + j * m];
312           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
313           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
314         }
315       }
316     } else { /* off processor entry */
317       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
318       if (!baij->donotstash) {
319         mat->assembled = PETSC_FALSE;
320         n_loc          = 0;
321         for (j = 0; j < n; j++) {
322           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
323           in_loc[n_loc] = in[j];
324           if (roworiented) {
325             v_loc[n_loc] = v[i * n + j];
326           } else {
327             v_loc[n_loc] = v[j * m + i];
328           }
329           n_loc++;
330         }
331         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
332       }
333     }
334   }
335   PetscFunctionReturn(0);
336 }
337 
338 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) {
339   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
340   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
341   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
342   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
343   PetscBool          roworiented = a->roworiented;
344   const PetscScalar *value       = v;
345   MatScalar         *ap, *aa = a->a, *bap;
346 
347   PetscFunctionBegin;
348   if (col < row) {
349     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
350     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
351   }
352   rp    = aj + ai[row];
353   ap    = aa + bs2 * ai[row];
354   rmax  = imax[row];
355   nrow  = ailen[row];
356   value = v;
357   low   = 0;
358   high  = nrow;
359 
360   while (high - low > 7) {
361     t = (low + high) / 2;
362     if (rp[t] > col) high = t;
363     else low = t;
364   }
365   for (i = low; i < high; i++) {
366     if (rp[i] > col) break;
367     if (rp[i] == col) {
368       bap = ap + bs2 * i;
369       if (roworiented) {
370         if (is == ADD_VALUES) {
371           for (ii = 0; ii < bs; ii++) {
372             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
373           }
374         } else {
375           for (ii = 0; ii < bs; ii++) {
376             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
377           }
378         }
379       } else {
380         if (is == ADD_VALUES) {
381           for (ii = 0; ii < bs; ii++) {
382             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
383           }
384         } else {
385           for (ii = 0; ii < bs; ii++) {
386             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
387           }
388         }
389       }
390       goto noinsert2;
391     }
392   }
393   if (nonew == 1) goto noinsert2;
394   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
395   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
396   N = nrow++ - 1;
397   high++;
398   /* shift up all the later entries in this row */
399   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
400   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
401   rp[i] = col;
402   bap   = ap + bs2 * i;
403   if (roworiented) {
404     for (ii = 0; ii < bs; ii++) {
405       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
406     }
407   } else {
408     for (ii = 0; ii < bs; ii++) {
409       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
410     }
411   }
412 noinsert2:;
413   ailen[row] = nrow;
414   PetscFunctionReturn(0);
415 }
416 
417 /*
418    This routine is exactly duplicated in mpibaij.c
419 */
420 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) {
421   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
422   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
423   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
424   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
425   PetscBool          roworiented = a->roworiented;
426   const PetscScalar *value       = v;
427   MatScalar         *ap, *aa = a->a, *bap;
428 
429   PetscFunctionBegin;
430   rp    = aj + ai[row];
431   ap    = aa + bs2 * ai[row];
432   rmax  = imax[row];
433   nrow  = ailen[row];
434   low   = 0;
435   high  = nrow;
436   value = v;
437   while (high - low > 7) {
438     t = (low + high) / 2;
439     if (rp[t] > col) high = t;
440     else low = t;
441   }
442   for (i = low; i < high; i++) {
443     if (rp[i] > col) break;
444     if (rp[i] == col) {
445       bap = ap + bs2 * i;
446       if (roworiented) {
447         if (is == ADD_VALUES) {
448           for (ii = 0; ii < bs; ii++) {
449             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
450           }
451         } else {
452           for (ii = 0; ii < bs; ii++) {
453             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
454           }
455         }
456       } else {
457         if (is == ADD_VALUES) {
458           for (ii = 0; ii < bs; ii++, value += bs) {
459             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
460             bap += bs;
461           }
462         } else {
463           for (ii = 0; ii < bs; ii++, value += bs) {
464             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
465             bap += bs;
466           }
467         }
468       }
469       goto noinsert2;
470     }
471   }
472   if (nonew == 1) goto noinsert2;
473   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
474   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
475   N = nrow++ - 1;
476   high++;
477   /* shift up all the later entries in this row */
478   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
479   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
480   rp[i] = col;
481   bap   = ap + bs2 * i;
482   if (roworiented) {
483     for (ii = 0; ii < bs; ii++) {
484       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
485     }
486   } else {
487     for (ii = 0; ii < bs; ii++) {
488       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
489     }
490   }
491 noinsert2:;
492   ailen[row] = nrow;
493   PetscFunctionReturn(0);
494 }
495 
496 /*
497     This routine could be optimized by removing the need for the block copy below and passing stride information
498   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
499 */
500 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) {
501   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
502   const MatScalar *value;
503   MatScalar       *barray      = baij->barray;
504   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
505   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
506   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
507   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
508 
509   PetscFunctionBegin;
510   if (!barray) {
511     PetscCall(PetscMalloc1(bs2, &barray));
512     baij->barray = barray;
513   }
514 
515   if (roworiented) {
516     stepval = (n - 1) * bs;
517   } else {
518     stepval = (m - 1) * bs;
519   }
520   for (i = 0; i < m; i++) {
521     if (im[i] < 0) continue;
522     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
523     if (im[i] >= rstart && im[i] < rend) {
524       row = im[i] - rstart;
525       for (j = 0; j < n; j++) {
526         if (im[i] > in[j]) {
527           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
528           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
529         }
530         /* If NumCol = 1 then a copy is not required */
531         if ((roworiented) && (n == 1)) {
532           barray = (MatScalar *)v + i * bs2;
533         } else if ((!roworiented) && (m == 1)) {
534           barray = (MatScalar *)v + j * bs2;
535         } else { /* Here a copy is required */
536           if (roworiented) {
537             value = v + i * (stepval + bs) * bs + j * bs;
538           } else {
539             value = v + j * (stepval + bs) * bs + i * bs;
540           }
541           for (ii = 0; ii < bs; ii++, value += stepval) {
542             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
543           }
544           barray -= bs2;
545         }
546 
547         if (in[j] >= cstart && in[j] < cend) {
548           col = in[j] - cstart;
549           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
550         } else if (in[j] < 0) {
551           continue;
552         } else {
553           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
554           if (mat->was_assembled) {
555             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
556 
557 #if defined(PETSC_USE_DEBUG)
558 #if defined(PETSC_USE_CTABLE)
559             {
560               PetscInt data;
561               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
562               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
563             }
564 #else
565             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
566 #endif
567 #endif
568 #if defined(PETSC_USE_CTABLE)
569             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
570             col = (col - 1) / bs;
571 #else
572             col = (baij->colmap[in[j]] - 1) / bs;
573 #endif
574             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
575               PetscCall(MatDisAssemble_MPISBAIJ(mat));
576               col = in[j];
577             }
578           } else col = in[j];
579           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
580         }
581       }
582     } else {
583       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
584       if (!baij->donotstash) {
585         if (roworiented) {
586           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
587         } else {
588           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
589         }
590       }
591     }
592   }
593   PetscFunctionReturn(0);
594 }
595 
596 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
597   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
598   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
599   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
600 
601   PetscFunctionBegin;
602   for (i = 0; i < m; i++) {
603     if (idxm[i] < 0) continue; /* negative row */
604     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
605     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
606       row = idxm[i] - bsrstart;
607       for (j = 0; j < n; j++) {
608         if (idxn[j] < 0) continue; /* negative column */
609         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
610         if (idxn[j] >= bscstart && idxn[j] < bscend) {
611           col = idxn[j] - bscstart;
612           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
613         } else {
614           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
615 #if defined(PETSC_USE_CTABLE)
616           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
617           data--;
618 #else
619           data = baij->colmap[idxn[j] / bs] - 1;
620 #endif
621           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
622           else {
623             col = data + idxn[j] % bs;
624             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
625           }
626         }
627       }
628     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) {
634   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
635   PetscReal     sum[2], *lnorm2;
636 
637   PetscFunctionBegin;
638   if (baij->size == 1) {
639     PetscCall(MatNorm(baij->A, type, norm));
640   } else {
641     if (type == NORM_FROBENIUS) {
642       PetscCall(PetscMalloc1(2, &lnorm2));
643       PetscCall(MatNorm(baij->A, type, lnorm2));
644       *lnorm2 = (*lnorm2) * (*lnorm2);
645       lnorm2++; /* squar power of norm(A) */
646       PetscCall(MatNorm(baij->B, type, lnorm2));
647       *lnorm2 = (*lnorm2) * (*lnorm2);
648       lnorm2--; /* squar power of norm(B) */
649       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
650       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
651       PetscCall(PetscFree(lnorm2));
652     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
653       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
654       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
655       PetscReal    *rsum, *rsum2, vabs;
656       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
657       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
658       MatScalar    *v;
659 
660       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
661       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
662       /* Amat */
663       v  = amat->a;
664       jj = amat->j;
665       for (brow = 0; brow < mbs; brow++) {
666         grow = bs * (rstart + brow);
667         nz   = amat->i[brow + 1] - amat->i[brow];
668         for (bcol = 0; bcol < nz; bcol++) {
669           gcol = bs * (rstart + *jj);
670           jj++;
671           for (col = 0; col < bs; col++) {
672             for (row = 0; row < bs; row++) {
673               vabs = PetscAbsScalar(*v);
674               v++;
675               rsum[gcol + col] += vabs;
676               /* non-diagonal block */
677               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
678             }
679           }
680         }
681         PetscCall(PetscLogFlops(nz * bs * bs));
682       }
683       /* Bmat */
684       v  = bmat->a;
685       jj = bmat->j;
686       for (brow = 0; brow < mbs; brow++) {
687         grow = bs * (rstart + brow);
688         nz   = bmat->i[brow + 1] - bmat->i[brow];
689         for (bcol = 0; bcol < nz; bcol++) {
690           gcol = bs * garray[*jj];
691           jj++;
692           for (col = 0; col < bs; col++) {
693             for (row = 0; row < bs; row++) {
694               vabs = PetscAbsScalar(*v);
695               v++;
696               rsum[gcol + col] += vabs;
697               rsum[grow + row] += vabs;
698             }
699           }
700         }
701         PetscCall(PetscLogFlops(nz * bs * bs));
702       }
703       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
704       *norm = 0.0;
705       for (col = 0; col < mat->cmap->N; col++) {
706         if (rsum2[col] > *norm) *norm = rsum2[col];
707       }
708       PetscCall(PetscFree2(rsum, rsum2));
709     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
710   }
711   PetscFunctionReturn(0);
712 }
713 
714 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) {
715   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
716   PetscInt      nstash, reallocs;
717 
718   PetscFunctionBegin;
719   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
720 
721   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
722   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
723   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
724   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
725   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
726   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
727   PetscFunctionReturn(0);
728 }
729 
730 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) {
731   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
732   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
733   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
734   PetscInt     *row, *col;
735   PetscBool     other_disassembled;
736   PetscMPIInt   n;
737   PetscBool     r1, r2, r3;
738   MatScalar    *val;
739 
740   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
741   PetscFunctionBegin;
742   if (!baij->donotstash && !mat->nooffprocentries) {
743     while (1) {
744       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
745       if (!flg) break;
746 
747       for (i = 0; i < n;) {
748         /* Now identify the consecutive vals belonging to the same row */
749         for (j = i, rstart = row[j]; j < n; j++) {
750           if (row[j] != rstart) break;
751         }
752         if (j < n) ncols = j - i;
753         else ncols = n - i;
754         /* Now assemble all these values with a single function call */
755         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
756         i = j;
757       }
758     }
759     PetscCall(MatStashScatterEnd_Private(&mat->stash));
760     /* Now process the block-stash. Since the values are stashed column-oriented,
761        set the roworiented flag to column oriented, and after MatSetValues()
762        restore the original flags */
763     r1 = baij->roworiented;
764     r2 = a->roworiented;
765     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
766 
767     baij->roworiented = PETSC_FALSE;
768     a->roworiented    = PETSC_FALSE;
769 
770     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
771     while (1) {
772       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
773       if (!flg) break;
774 
775       for (i = 0; i < n;) {
776         /* Now identify the consecutive vals belonging to the same row */
777         for (j = i, rstart = row[j]; j < n; j++) {
778           if (row[j] != rstart) break;
779         }
780         if (j < n) ncols = j - i;
781         else ncols = n - i;
782         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
783         i = j;
784       }
785     }
786     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
787 
788     baij->roworiented = r1;
789     a->roworiented    = r2;
790 
791     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
792   }
793 
794   PetscCall(MatAssemblyBegin(baij->A, mode));
795   PetscCall(MatAssemblyEnd(baij->A, mode));
796 
797   /* determine if any processor has disassembled, if so we must
798      also disassemble ourselves, in order that we may reassemble. */
799   /*
800      if nonzero structure of submatrix B cannot change then we know that
801      no processor disassembled thus we can skip this stuff
802   */
803   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
804     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
805     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
806   }
807 
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
809   PetscCall(MatAssemblyBegin(baij->B, mode));
810   PetscCall(MatAssemblyEnd(baij->B, mode));
811 
812   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
813 
814   baij->rowvalues = NULL;
815 
816   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
817   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
818     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
819     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
825 #include <petscdraw.h>
826 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
827   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
828   PetscInt          bs   = mat->rmap->bs;
829   PetscMPIInt       rank = baij->rank;
830   PetscBool         iascii, isdraw;
831   PetscViewer       sviewer;
832   PetscViewerFormat format;
833 
834   PetscFunctionBegin;
835   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
836   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
837   if (iascii) {
838     PetscCall(PetscViewerGetFormat(viewer, &format));
839     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
840       MatInfo info;
841       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
842       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
843       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
844       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
845                                                    mat->rmap->bs, (double)info.memory));
846       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
847       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
848       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
849       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
850       PetscCall(PetscViewerFlush(viewer));
851       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
852       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
853       PetscCall(VecScatterView(baij->Mvctx, viewer));
854       PetscFunctionReturn(0);
855     } else if (format == PETSC_VIEWER_ASCII_INFO) {
856       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
857       PetscFunctionReturn(0);
858     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
859       PetscFunctionReturn(0);
860     }
861   }
862 
863   if (isdraw) {
864     PetscDraw draw;
865     PetscBool isnull;
866     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
867     PetscCall(PetscDrawIsNull(draw, &isnull));
868     if (isnull) PetscFunctionReturn(0);
869   }
870 
871   {
872     /* assemble the entire matrix onto first processor. */
873     Mat           A;
874     Mat_SeqSBAIJ *Aloc;
875     Mat_SeqBAIJ  *Bloc;
876     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
877     MatScalar    *a;
878     const char   *matname;
879 
880     /* Should this be the same type as mat? */
881     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
882     if (rank == 0) {
883       PetscCall(MatSetSizes(A, M, N, M, N));
884     } else {
885       PetscCall(MatSetSizes(A, 0, 0, M, N));
886     }
887     PetscCall(MatSetType(A, MATMPISBAIJ));
888     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
889     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
890 
891     /* copy over the A part */
892     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
893     ai   = Aloc->i;
894     aj   = Aloc->j;
895     a    = Aloc->a;
896     PetscCall(PetscMalloc1(bs, &rvals));
897 
898     for (i = 0; i < mbs; i++) {
899       rvals[0] = bs * (baij->rstartbs + i);
900       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
901       for (j = ai[i]; j < ai[i + 1]; j++) {
902         col = (baij->cstartbs + aj[j]) * bs;
903         for (k = 0; k < bs; k++) {
904           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
905           col++;
906           a += bs;
907         }
908       }
909     }
910     /* copy over the B part */
911     Bloc = (Mat_SeqBAIJ *)baij->B->data;
912     ai   = Bloc->i;
913     aj   = Bloc->j;
914     a    = Bloc->a;
915     for (i = 0; i < mbs; i++) {
916       rvals[0] = bs * (baij->rstartbs + i);
917       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
918       for (j = ai[i]; j < ai[i + 1]; j++) {
919         col = baij->garray[aj[j]] * bs;
920         for (k = 0; k < bs; k++) {
921           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
922           col++;
923           a += bs;
924         }
925       }
926     }
927     PetscCall(PetscFree(rvals));
928     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
929     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
930     /*
931        Everyone has to call to draw the matrix since the graphics waits are
932        synchronized across all processors that share the PetscDraw object
933     */
934     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
935     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
936     if (rank == 0) {
937       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
938       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
939     }
940     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
941     PetscCall(PetscViewerFlush(viewer));
942     PetscCall(MatDestroy(&A));
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 /* Used for both MPIBAIJ and MPISBAIJ matrices */
948 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
949 
950 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) {
951   PetscBool iascii, isdraw, issocket, isbinary;
952 
953   PetscFunctionBegin;
954   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
955   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
956   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
957   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
958   if (iascii || isdraw || issocket) {
959     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
960   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
961   PetscFunctionReturn(0);
962 }
963 
964 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) {
965   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
966 
967   PetscFunctionBegin;
968 #if defined(PETSC_USE_LOG)
969   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
970 #endif
971   PetscCall(MatStashDestroy_Private(&mat->stash));
972   PetscCall(MatStashDestroy_Private(&mat->bstash));
973   PetscCall(MatDestroy(&baij->A));
974   PetscCall(MatDestroy(&baij->B));
975 #if defined(PETSC_USE_CTABLE)
976   PetscCall(PetscTableDestroy(&baij->colmap));
977 #else
978   PetscCall(PetscFree(baij->colmap));
979 #endif
980   PetscCall(PetscFree(baij->garray));
981   PetscCall(VecDestroy(&baij->lvec));
982   PetscCall(VecScatterDestroy(&baij->Mvctx));
983   PetscCall(VecDestroy(&baij->slvec0));
984   PetscCall(VecDestroy(&baij->slvec0b));
985   PetscCall(VecDestroy(&baij->slvec1));
986   PetscCall(VecDestroy(&baij->slvec1a));
987   PetscCall(VecDestroy(&baij->slvec1b));
988   PetscCall(VecScatterDestroy(&baij->sMvctx));
989   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
990   PetscCall(PetscFree(baij->barray));
991   PetscCall(PetscFree(baij->hd));
992   PetscCall(VecDestroy(&baij->diag));
993   PetscCall(VecDestroy(&baij->bb1));
994   PetscCall(VecDestroy(&baij->xx1));
995 #if defined(PETSC_USE_REAL_MAT_SINGLE)
996   PetscCall(PetscFree(baij->setvaluescopy));
997 #endif
998   PetscCall(PetscFree(baij->in_loc));
999   PetscCall(PetscFree(baij->v_loc));
1000   PetscCall(PetscFree(baij->rangebs));
1001   PetscCall(PetscFree(mat->data));
1002 
1003   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1004   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1005   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1006   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
1007   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
1008 #if defined(PETSC_HAVE_ELEMENTAL)
1009   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
1010 #endif
1011 #if defined(PETSC_HAVE_SCALAPACK)
1012   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
1013 #endif
1014   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
1015   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) {
1020   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1021   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1022   PetscScalar       *from;
1023   const PetscScalar *x;
1024 
1025   PetscFunctionBegin;
1026   /* diagonal part */
1027   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1028   PetscCall(VecSet(a->slvec1b, 0.0));
1029 
1030   /* subdiagonal part */
1031   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1032   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1033 
1034   /* copy x into the vec slvec0 */
1035   PetscCall(VecGetArray(a->slvec0, &from));
1036   PetscCall(VecGetArrayRead(xx, &x));
1037 
1038   PetscCall(PetscArraycpy(from, x, bs * mbs));
1039   PetscCall(VecRestoreArray(a->slvec0, &from));
1040   PetscCall(VecRestoreArrayRead(xx, &x));
1041 
1042   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1043   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1044   /* supperdiagonal part */
1045   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) {
1050   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1051   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1052   PetscScalar       *from;
1053   const PetscScalar *x;
1054 
1055   PetscFunctionBegin;
1056   /* diagonal part */
1057   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1058   PetscCall(VecSet(a->slvec1b, 0.0));
1059 
1060   /* subdiagonal part */
1061   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1062 
1063   /* copy x into the vec slvec0 */
1064   PetscCall(VecGetArray(a->slvec0, &from));
1065   PetscCall(VecGetArrayRead(xx, &x));
1066 
1067   PetscCall(PetscArraycpy(from, x, bs * mbs));
1068   PetscCall(VecRestoreArray(a->slvec0, &from));
1069   PetscCall(VecRestoreArrayRead(xx, &x));
1070 
1071   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1072   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1073   /* supperdiagonal part */
1074   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) {
1079   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1080   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1081   PetscScalar       *from, zero       = 0.0;
1082   const PetscScalar *x;
1083 
1084   PetscFunctionBegin;
1085   /* diagonal part */
1086   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1087   PetscCall(VecSet(a->slvec1b, zero));
1088 
1089   /* subdiagonal part */
1090   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1091   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1092 
1093   /* copy x into the vec slvec0 */
1094   PetscCall(VecGetArray(a->slvec0, &from));
1095   PetscCall(VecGetArrayRead(xx, &x));
1096   PetscCall(PetscArraycpy(from, x, bs * mbs));
1097   PetscCall(VecRestoreArray(a->slvec0, &from));
1098 
1099   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1100   PetscCall(VecRestoreArrayRead(xx, &x));
1101   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1102 
1103   /* supperdiagonal part */
1104   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1109   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1110   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1111   PetscScalar       *from, zero       = 0.0;
1112   const PetscScalar *x;
1113 
1114   PetscFunctionBegin;
1115   /* diagonal part */
1116   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1117   PetscCall(VecSet(a->slvec1b, zero));
1118 
1119   /* subdiagonal part */
1120   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1121 
1122   /* copy x into the vec slvec0 */
1123   PetscCall(VecGetArray(a->slvec0, &from));
1124   PetscCall(VecGetArrayRead(xx, &x));
1125   PetscCall(PetscArraycpy(from, x, bs * mbs));
1126   PetscCall(VecRestoreArray(a->slvec0, &from));
1127 
1128   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1129   PetscCall(VecRestoreArrayRead(xx, &x));
1130   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1131 
1132   /* supperdiagonal part */
1133   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 /*
1138   This only works correctly for square matrices where the subblock A->A is the
1139    diagonal block
1140 */
1141 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) {
1142   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1143 
1144   PetscFunctionBegin;
1145   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1146   PetscCall(MatGetDiagonal(a->A, v));
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) {
1151   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1152 
1153   PetscFunctionBegin;
1154   PetscCall(MatScale(a->A, aa));
1155   PetscCall(MatScale(a->B, aa));
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1160   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1161   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1162   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1163   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1164   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1165 
1166   PetscFunctionBegin;
1167   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1168   mat->getrowactive = PETSC_TRUE;
1169 
1170   if (!mat->rowvalues && (idx || v)) {
1171     /*
1172         allocate enough space to hold information from the longest row.
1173     */
1174     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1175     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1176     PetscInt      max = 1, mbs = mat->mbs, tmp;
1177     for (i = 0; i < mbs; i++) {
1178       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1179       if (max < tmp) max = tmp;
1180     }
1181     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1182   }
1183 
1184   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1185   lrow = row - brstart; /* local row index */
1186 
1187   pvA = &vworkA;
1188   pcA = &cworkA;
1189   pvB = &vworkB;
1190   pcB = &cworkB;
1191   if (!v) {
1192     pvA = NULL;
1193     pvB = NULL;
1194   }
1195   if (!idx) {
1196     pcA = NULL;
1197     if (!v) pcB = NULL;
1198   }
1199   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1200   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1201   nztot = nzA + nzB;
1202 
1203   cmap = mat->garray;
1204   if (v || idx) {
1205     if (nztot) {
1206       /* Sort by increasing column numbers, assuming A and B already sorted */
1207       PetscInt imark = -1;
1208       if (v) {
1209         *v = v_p = mat->rowvalues;
1210         for (i = 0; i < nzB; i++) {
1211           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1212           else break;
1213         }
1214         imark = i;
1215         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1216         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1217       }
1218       if (idx) {
1219         *idx = idx_p = mat->rowindices;
1220         if (imark > -1) {
1221           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1222         } else {
1223           for (i = 0; i < nzB; i++) {
1224             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1225             else break;
1226           }
1227           imark = i;
1228         }
1229         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1230         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1231       }
1232     } else {
1233       if (idx) *idx = NULL;
1234       if (v) *v = NULL;
1235     }
1236   }
1237   *nz = nztot;
1238   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1239   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1244   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1245 
1246   PetscFunctionBegin;
1247   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1248   baij->getrowactive = PETSC_FALSE;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) {
1253   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1254   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1255 
1256   PetscFunctionBegin;
1257   aA->getrow_utriangular = PETSC_TRUE;
1258   PetscFunctionReturn(0);
1259 }
1260 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) {
1261   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1262   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1263 
1264   PetscFunctionBegin;
1265   aA->getrow_utriangular = PETSC_FALSE;
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) {
1270   PetscFunctionBegin;
1271   if (PetscDefined(USE_COMPLEX)) {
1272     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1273 
1274     PetscCall(MatConjugate(a->A));
1275     PetscCall(MatConjugate(a->B));
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) {
1281   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1282 
1283   PetscFunctionBegin;
1284   PetscCall(MatRealPart(a->A));
1285   PetscCall(MatRealPart(a->B));
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) {
1290   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1291 
1292   PetscFunctionBegin;
1293   PetscCall(MatImaginaryPart(a->A));
1294   PetscCall(MatImaginaryPart(a->B));
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1299    Input: isrow       - distributed(parallel),
1300           iscol_local - locally owned (seq)
1301 */
1302 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) {
1303   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1304   const PetscInt *ptr1, *ptr2;
1305 
1306   PetscFunctionBegin;
1307   PetscCall(ISGetLocalSize(isrow, &sz1));
1308   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1309   if (sz1 > sz2) {
1310     *flg = PETSC_FALSE;
1311     PetscFunctionReturn(0);
1312   }
1313 
1314   PetscCall(ISGetIndices(isrow, &ptr1));
1315   PetscCall(ISGetIndices(iscol_local, &ptr2));
1316 
1317   PetscCall(PetscMalloc1(sz1, &a1));
1318   PetscCall(PetscMalloc1(sz2, &a2));
1319   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1320   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1321   PetscCall(PetscSortInt(sz1, a1));
1322   PetscCall(PetscSortInt(sz2, a2));
1323 
1324   nmatch = 0;
1325   k      = 0;
1326   for (i = 0; i < sz1; i++) {
1327     for (j = k; j < sz2; j++) {
1328       if (a1[i] == a2[j]) {
1329         k = j;
1330         nmatch++;
1331         break;
1332       }
1333     }
1334   }
1335   PetscCall(ISRestoreIndices(isrow, &ptr1));
1336   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1337   PetscCall(PetscFree(a1));
1338   PetscCall(PetscFree(a2));
1339   if (nmatch < sz1) {
1340     *flg = PETSC_FALSE;
1341   } else {
1342     *flg = PETSC_TRUE;
1343   }
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
1348   IS        iscol_local;
1349   PetscInt  csize;
1350   PetscBool isequal;
1351 
1352   PetscFunctionBegin;
1353   PetscCall(ISGetLocalSize(iscol, &csize));
1354   if (call == MAT_REUSE_MATRIX) {
1355     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1356     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1357   } else {
1358     PetscBool issorted;
1359 
1360     PetscCall(ISAllGather(iscol, &iscol_local));
1361     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1362     PetscCall(ISSorted(iscol_local, &issorted));
1363     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1364   }
1365 
1366   /* now call MatCreateSubMatrix_MPIBAIJ() */
1367   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1368   if (call == MAT_INITIAL_MATRIX) {
1369     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1370     PetscCall(ISDestroy(&iscol_local));
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) {
1376   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1377 
1378   PetscFunctionBegin;
1379   PetscCall(MatZeroEntries(l->A));
1380   PetscCall(MatZeroEntries(l->B));
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
1385   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1386   Mat            A = a->A, B = a->B;
1387   PetscLogDouble isend[5], irecv[5];
1388 
1389   PetscFunctionBegin;
1390   info->block_size = (PetscReal)matin->rmap->bs;
1391 
1392   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1393 
1394   isend[0] = info->nz_used;
1395   isend[1] = info->nz_allocated;
1396   isend[2] = info->nz_unneeded;
1397   isend[3] = info->memory;
1398   isend[4] = info->mallocs;
1399 
1400   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1401 
1402   isend[0] += info->nz_used;
1403   isend[1] += info->nz_allocated;
1404   isend[2] += info->nz_unneeded;
1405   isend[3] += info->memory;
1406   isend[4] += info->mallocs;
1407   if (flag == MAT_LOCAL) {
1408     info->nz_used      = isend[0];
1409     info->nz_allocated = isend[1];
1410     info->nz_unneeded  = isend[2];
1411     info->memory       = isend[3];
1412     info->mallocs      = isend[4];
1413   } else if (flag == MAT_GLOBAL_MAX) {
1414     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1415 
1416     info->nz_used      = irecv[0];
1417     info->nz_allocated = irecv[1];
1418     info->nz_unneeded  = irecv[2];
1419     info->memory       = irecv[3];
1420     info->mallocs      = irecv[4];
1421   } else if (flag == MAT_GLOBAL_SUM) {
1422     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1423 
1424     info->nz_used      = irecv[0];
1425     info->nz_allocated = irecv[1];
1426     info->nz_unneeded  = irecv[2];
1427     info->memory       = irecv[3];
1428     info->mallocs      = irecv[4];
1429   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1430   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1431   info->fill_ratio_needed = 0;
1432   info->factor_mallocs    = 0;
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) {
1437   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1438   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1439 
1440   PetscFunctionBegin;
1441   switch (op) {
1442   case MAT_NEW_NONZERO_LOCATIONS:
1443   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1444   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1445   case MAT_KEEP_NONZERO_PATTERN:
1446   case MAT_SUBMAT_SINGLEIS:
1447   case MAT_NEW_NONZERO_LOCATION_ERR:
1448     MatCheckPreallocated(A, 1);
1449     PetscCall(MatSetOption(a->A, op, flg));
1450     PetscCall(MatSetOption(a->B, op, flg));
1451     break;
1452   case MAT_ROW_ORIENTED:
1453     MatCheckPreallocated(A, 1);
1454     a->roworiented = flg;
1455 
1456     PetscCall(MatSetOption(a->A, op, flg));
1457     PetscCall(MatSetOption(a->B, op, flg));
1458     break;
1459   case MAT_FORCE_DIAGONAL_ENTRIES:
1460   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
1461   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
1462   case MAT_USE_HASH_TABLE: a->ht_flag = flg; break;
1463   case MAT_HERMITIAN: MatCheckPreallocated(A, 1); PetscCall(MatSetOption(a->A, op, flg));
1464 #if defined(PETSC_USE_COMPLEX)
1465     if (flg) { /* need different mat-vec ops */
1466       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1467       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1468       A->ops->multtranspose    = NULL;
1469       A->ops->multtransposeadd = NULL;
1470       A->symmetric             = PETSC_BOOL3_FALSE;
1471     }
1472 #endif
1473     break;
1474   case MAT_SPD:
1475   case MAT_SYMMETRIC: MatCheckPreallocated(A, 1); PetscCall(MatSetOption(a->A, op, flg));
1476 #if defined(PETSC_USE_COMPLEX)
1477     if (flg) { /* restore to use default mat-vec ops */
1478       A->ops->mult             = MatMult_MPISBAIJ;
1479       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1480       A->ops->multtranspose    = MatMult_MPISBAIJ;
1481       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1482     }
1483 #endif
1484     break;
1485   case MAT_STRUCTURALLY_SYMMETRIC:
1486     MatCheckPreallocated(A, 1);
1487     PetscCall(MatSetOption(a->A, op, flg));
1488     break;
1489   case MAT_SYMMETRY_ETERNAL:
1490   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1491     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1492     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1493     break;
1494   case MAT_SPD_ETERNAL: break;
1495   case MAT_IGNORE_LOWER_TRIANGULAR: aA->ignore_ltriangular = flg; break;
1496   case MAT_ERROR_LOWER_TRIANGULAR: aA->ignore_ltriangular = flg; break;
1497   case MAT_GETROW_UPPERTRIANGULAR: aA->getrow_utriangular = flg; break;
1498   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) {
1504   PetscFunctionBegin;
1505   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1506   if (reuse == MAT_INITIAL_MATRIX) {
1507     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1508   } else if (reuse == MAT_REUSE_MATRIX) {
1509     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) {
1515   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1516   Mat           a = baij->A, b = baij->B;
1517   PetscInt      nv, m, n;
1518   PetscBool     flg;
1519 
1520   PetscFunctionBegin;
1521   if (ll != rr) {
1522     PetscCall(VecEqual(ll, rr, &flg));
1523     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1524   }
1525   if (!ll) PetscFunctionReturn(0);
1526 
1527   PetscCall(MatGetLocalSize(mat, &m, &n));
1528   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1529 
1530   PetscCall(VecGetLocalSize(rr, &nv));
1531   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1532 
1533   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1534 
1535   /* left diagonalscale the off-diagonal part */
1536   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1537 
1538   /* scale the diagonal part */
1539   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1540 
1541   /* right diagonalscale the off-diagonal part */
1542   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1543   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) {
1548   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1549 
1550   PetscFunctionBegin;
1551   PetscCall(MatSetUnfactored(a->A));
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1556 
1557 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) {
1558   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1559   Mat           a, b, c, d;
1560   PetscBool     flg;
1561 
1562   PetscFunctionBegin;
1563   a = matA->A;
1564   b = matA->B;
1565   c = matB->A;
1566   d = matB->B;
1567 
1568   PetscCall(MatEqual(a, c, &flg));
1569   if (flg) PetscCall(MatEqual(b, d, &flg));
1570   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) {
1575   PetscBool isbaij;
1576 
1577   PetscFunctionBegin;
1578   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1579   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1580   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1581   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1582     PetscCall(MatGetRowUpperTriangular(A));
1583     PetscCall(MatCopy_Basic(A, B, str));
1584     PetscCall(MatRestoreRowUpperTriangular(A));
1585   } else {
1586     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1587     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1588 
1589     PetscCall(MatCopy(a->A, b->A, str));
1590     PetscCall(MatCopy(a->B, b->B, str));
1591   }
1592   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) {
1597   PetscFunctionBegin;
1598   PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
1603   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1604   PetscBLASInt  bnz, one                          = 1;
1605   Mat_SeqSBAIJ *xa, *ya;
1606   Mat_SeqBAIJ  *xb, *yb;
1607 
1608   PetscFunctionBegin;
1609   if (str == SAME_NONZERO_PATTERN) {
1610     PetscScalar alpha = a;
1611     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1612     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1613     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1614     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1615     xb = (Mat_SeqBAIJ *)xx->B->data;
1616     yb = (Mat_SeqBAIJ *)yy->B->data;
1617     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1618     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1619     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1620   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1621     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1622     PetscCall(MatAXPY_Basic(Y, a, X, str));
1623     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1624   } else {
1625     Mat       B;
1626     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1627     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1628     PetscCall(MatGetRowUpperTriangular(X));
1629     PetscCall(MatGetRowUpperTriangular(Y));
1630     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1631     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1632     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1633     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1634     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1635     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1636     PetscCall(MatSetType(B, MATMPISBAIJ));
1637     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1638     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1639     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1640     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1641     PetscCall(MatHeaderMerge(Y, &B));
1642     PetscCall(PetscFree(nnz_d));
1643     PetscCall(PetscFree(nnz_o));
1644     PetscCall(MatRestoreRowUpperTriangular(X));
1645     PetscCall(MatRestoreRowUpperTriangular(Y));
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) {
1651   PetscInt  i;
1652   PetscBool flg;
1653 
1654   PetscFunctionBegin;
1655   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1656   for (i = 0; i < n; i++) {
1657     PetscCall(ISEqual(irow[i], icol[i], &flg));
1658     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1659   }
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) {
1664   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1665   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1666 
1667   PetscFunctionBegin;
1668   if (!Y->preallocated) {
1669     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1670   } else if (!aij->nz) {
1671     PetscInt nonew = aij->nonew;
1672     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1673     aij->nonew = nonew;
1674   }
1675   PetscCall(MatShift_Basic(Y, a));
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) {
1680   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1681 
1682   PetscFunctionBegin;
1683   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1684   PetscCall(MatMissingDiagonal(a->A, missing, d));
1685   if (d) {
1686     PetscInt rstart;
1687     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1688     *d += rstart / A->rmap->bs;
1689   }
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) {
1694   PetscFunctionBegin;
1695   *a = ((Mat_MPISBAIJ *)A->data)->A;
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 /* -------------------------------------------------------------------*/
1700 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1701                                        MatGetRow_MPISBAIJ,
1702                                        MatRestoreRow_MPISBAIJ,
1703                                        MatMult_MPISBAIJ,
1704                                        /*  4*/ MatMultAdd_MPISBAIJ,
1705                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1706                                        MatMultAdd_MPISBAIJ,
1707                                        NULL,
1708                                        NULL,
1709                                        NULL,
1710                                        /* 10*/ NULL,
1711                                        NULL,
1712                                        NULL,
1713                                        MatSOR_MPISBAIJ,
1714                                        MatTranspose_MPISBAIJ,
1715                                        /* 15*/ MatGetInfo_MPISBAIJ,
1716                                        MatEqual_MPISBAIJ,
1717                                        MatGetDiagonal_MPISBAIJ,
1718                                        MatDiagonalScale_MPISBAIJ,
1719                                        MatNorm_MPISBAIJ,
1720                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1721                                        MatAssemblyEnd_MPISBAIJ,
1722                                        MatSetOption_MPISBAIJ,
1723                                        MatZeroEntries_MPISBAIJ,
1724                                        /* 24*/ NULL,
1725                                        NULL,
1726                                        NULL,
1727                                        NULL,
1728                                        NULL,
1729                                        /* 29*/ MatSetUp_MPISBAIJ,
1730                                        NULL,
1731                                        NULL,
1732                                        MatGetDiagonalBlock_MPISBAIJ,
1733                                        NULL,
1734                                        /* 34*/ MatDuplicate_MPISBAIJ,
1735                                        NULL,
1736                                        NULL,
1737                                        NULL,
1738                                        NULL,
1739                                        /* 39*/ MatAXPY_MPISBAIJ,
1740                                        MatCreateSubMatrices_MPISBAIJ,
1741                                        MatIncreaseOverlap_MPISBAIJ,
1742                                        MatGetValues_MPISBAIJ,
1743                                        MatCopy_MPISBAIJ,
1744                                        /* 44*/ NULL,
1745                                        MatScale_MPISBAIJ,
1746                                        MatShift_MPISBAIJ,
1747                                        NULL,
1748                                        NULL,
1749                                        /* 49*/ NULL,
1750                                        NULL,
1751                                        NULL,
1752                                        NULL,
1753                                        NULL,
1754                                        /* 54*/ NULL,
1755                                        NULL,
1756                                        MatSetUnfactored_MPISBAIJ,
1757                                        NULL,
1758                                        MatSetValuesBlocked_MPISBAIJ,
1759                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1760                                        NULL,
1761                                        NULL,
1762                                        NULL,
1763                                        NULL,
1764                                        /* 64*/ NULL,
1765                                        NULL,
1766                                        NULL,
1767                                        NULL,
1768                                        NULL,
1769                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1770                                        NULL,
1771                                        MatConvert_MPISBAIJ_Basic,
1772                                        NULL,
1773                                        NULL,
1774                                        /* 74*/ NULL,
1775                                        NULL,
1776                                        NULL,
1777                                        NULL,
1778                                        NULL,
1779                                        /* 79*/ NULL,
1780                                        NULL,
1781                                        NULL,
1782                                        NULL,
1783                                        MatLoad_MPISBAIJ,
1784                                        /* 84*/ NULL,
1785                                        NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        /* 89*/ NULL,
1790                                        NULL,
1791                                        NULL,
1792                                        NULL,
1793                                        NULL,
1794                                        /* 94*/ NULL,
1795                                        NULL,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        /* 99*/ NULL,
1800                                        NULL,
1801                                        NULL,
1802                                        MatConjugate_MPISBAIJ,
1803                                        NULL,
1804                                        /*104*/ NULL,
1805                                        MatRealPart_MPISBAIJ,
1806                                        MatImaginaryPart_MPISBAIJ,
1807                                        MatGetRowUpperTriangular_MPISBAIJ,
1808                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1809                                        /*109*/ NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        MatMissingDiagonal_MPISBAIJ,
1814                                        /*114*/ NULL,
1815                                        NULL,
1816                                        NULL,
1817                                        NULL,
1818                                        NULL,
1819                                        /*119*/ NULL,
1820                                        NULL,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        /*124*/ NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        /*129*/ NULL,
1830                                        NULL,
1831                                        NULL,
1832                                        NULL,
1833                                        NULL,
1834                                        /*134*/ NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        /*139*/ MatSetBlockSizes_Default,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1845                                        NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        /*150*/ NULL};
1851 
1852 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) {
1853   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1854   PetscInt      i, mbs, Mbs;
1855   PetscMPIInt   size;
1856 
1857   PetscFunctionBegin;
1858   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1859   PetscCall(PetscLayoutSetUp(B->rmap));
1860   PetscCall(PetscLayoutSetUp(B->cmap));
1861   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1862   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1863   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1864 
1865   mbs = B->rmap->n / bs;
1866   Mbs = B->rmap->N / bs;
1867   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1868 
1869   B->rmap->bs = bs;
1870   b->bs2      = bs * bs;
1871   b->mbs      = mbs;
1872   b->Mbs      = Mbs;
1873   b->nbs      = B->cmap->n / bs;
1874   b->Nbs      = B->cmap->N / bs;
1875 
1876   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1877   b->rstartbs = B->rmap->rstart / bs;
1878   b->rendbs   = B->rmap->rend / bs;
1879 
1880   b->cstartbs = B->cmap->rstart / bs;
1881   b->cendbs   = B->cmap->rend / bs;
1882 
1883 #if defined(PETSC_USE_CTABLE)
1884   PetscCall(PetscTableDestroy(&b->colmap));
1885 #else
1886   PetscCall(PetscFree(b->colmap));
1887 #endif
1888   PetscCall(PetscFree(b->garray));
1889   PetscCall(VecDestroy(&b->lvec));
1890   PetscCall(VecScatterDestroy(&b->Mvctx));
1891   PetscCall(VecDestroy(&b->slvec0));
1892   PetscCall(VecDestroy(&b->slvec0b));
1893   PetscCall(VecDestroy(&b->slvec1));
1894   PetscCall(VecDestroy(&b->slvec1a));
1895   PetscCall(VecDestroy(&b->slvec1b));
1896   PetscCall(VecScatterDestroy(&b->sMvctx));
1897 
1898   /* Because the B will have been resized we simply destroy it and create a new one each time */
1899   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1900   PetscCall(MatDestroy(&b->B));
1901   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1902   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1903   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1904 
1905   if (!B->preallocated) {
1906     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1907     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1908     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1909     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1910   }
1911 
1912   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1913   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1914 
1915   B->preallocated  = PETSC_TRUE;
1916   B->was_assembled = PETSC_FALSE;
1917   B->assembled     = PETSC_FALSE;
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) {
1922   PetscInt        m, rstart, cend;
1923   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1924   const PetscInt *JJ          = NULL;
1925   PetscScalar    *values      = NULL;
1926   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1927   PetscBool       nooffprocentries;
1928 
1929   PetscFunctionBegin;
1930   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1931   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1932   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1933   PetscCall(PetscLayoutSetUp(B->rmap));
1934   PetscCall(PetscLayoutSetUp(B->cmap));
1935   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1936   m      = B->rmap->n / bs;
1937   rstart = B->rmap->rstart / bs;
1938   cend   = B->cmap->rend / bs;
1939 
1940   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
1941   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
1942   for (i = 0; i < m; i++) {
1943     nz = ii[i + 1] - ii[i];
1944     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
1945     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
1946     JJ = jj + ii[i];
1947     bd = 0;
1948     for (j = 0; j < nz; j++) {
1949       if (*JJ >= i + rstart) break;
1950       JJ++;
1951       bd++;
1952     }
1953     d = 0;
1954     for (; j < nz; j++) {
1955       if (*JJ++ >= cend) break;
1956       d++;
1957     }
1958     d_nnz[i] = d;
1959     o_nnz[i] = nz - d - bd;
1960     nz       = nz - bd;
1961     nz_max   = PetscMax(nz_max, nz);
1962   }
1963   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
1964   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
1965   PetscCall(PetscFree2(d_nnz, o_nnz));
1966 
1967   values = (PetscScalar *)V;
1968   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
1969   for (i = 0; i < m; i++) {
1970     PetscInt        row   = i + rstart;
1971     PetscInt        ncols = ii[i + 1] - ii[i];
1972     const PetscInt *icols = jj + ii[i];
1973     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
1974       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
1975       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
1976     } else { /* block ordering does not match so we can only insert one block at a time. */
1977       PetscInt j;
1978       for (j = 0; j < ncols; j++) {
1979         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
1980         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
1981       }
1982     }
1983   }
1984 
1985   if (!V) PetscCall(PetscFree(values));
1986   nooffprocentries    = B->nooffprocentries;
1987   B->nooffprocentries = PETSC_TRUE;
1988   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1989   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1990   B->nooffprocentries = nooffprocentries;
1991 
1992   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 /*MC
1997    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1998    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1999    the matrix is stored.
2000 
2001    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2002    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2003 
2004    Options Database Keys:
2005 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2006 
2007    Note:
2008      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2009      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2010 
2011    Level: beginner
2012 
2013 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2014 M*/
2015 
2016 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) {
2017   Mat_MPISBAIJ *b;
2018   PetscBool     flg = PETSC_FALSE;
2019 
2020   PetscFunctionBegin;
2021   PetscCall(PetscNew(&b));
2022   B->data = (void *)b;
2023   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2024 
2025   B->ops->destroy = MatDestroy_MPISBAIJ;
2026   B->ops->view    = MatView_MPISBAIJ;
2027   B->assembled    = PETSC_FALSE;
2028   B->insertmode   = NOT_SET_VALUES;
2029 
2030   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2031   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2032 
2033   /* build local table of row and column ownerships */
2034   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2035 
2036   /* build cache for off array entries formed */
2037   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2038 
2039   b->donotstash  = PETSC_FALSE;
2040   b->colmap      = NULL;
2041   b->garray      = NULL;
2042   b->roworiented = PETSC_TRUE;
2043 
2044   /* stuff used in block assembly */
2045   b->barray = NULL;
2046 
2047   /* stuff used for matrix vector multiply */
2048   b->lvec    = NULL;
2049   b->Mvctx   = NULL;
2050   b->slvec0  = NULL;
2051   b->slvec0b = NULL;
2052   b->slvec1  = NULL;
2053   b->slvec1a = NULL;
2054   b->slvec1b = NULL;
2055   b->sMvctx  = NULL;
2056 
2057   /* stuff for MatGetRow() */
2058   b->rowindices   = NULL;
2059   b->rowvalues    = NULL;
2060   b->getrowactive = PETSC_FALSE;
2061 
2062   /* hash table stuff */
2063   b->ht           = NULL;
2064   b->hd           = NULL;
2065   b->ht_size      = 0;
2066   b->ht_flag      = PETSC_FALSE;
2067   b->ht_fact      = 0;
2068   b->ht_total_ct  = 0;
2069   b->ht_insert_ct = 0;
2070 
2071   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2072   b->ijonly = PETSC_FALSE;
2073 
2074   b->in_loc = NULL;
2075   b->v_loc  = NULL;
2076   b->n_loc  = 0;
2077 
2078   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2079   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2080   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2081   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2082 #if defined(PETSC_HAVE_ELEMENTAL)
2083   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2084 #endif
2085 #if defined(PETSC_HAVE_SCALAPACK)
2086   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2087 #endif
2088   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2089   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2090 
2091   B->symmetric                   = PETSC_BOOL3_TRUE;
2092   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2093   B->symmetry_eternal            = PETSC_TRUE;
2094   B->structural_symmetry_eternal = PETSC_TRUE;
2095 #if defined(PETSC_USE_COMPLEX)
2096   B->hermitian = PETSC_BOOL3_FALSE;
2097 #else
2098   B->hermitian = PETSC_BOOL3_TRUE;
2099 #endif
2100 
2101   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2102   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2103   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2104   if (flg) {
2105     PetscReal fact = 1.39;
2106     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2107     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2108     if (fact <= 1.0) fact = 1.39;
2109     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2110     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2111   }
2112   PetscOptionsEnd();
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 /*MC
2117    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2118 
2119    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2120    and `MATMPISBAIJ` otherwise.
2121 
2122    Options Database Key:
2123 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2124 
2125   Level: beginner
2126 
2127 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2128 M*/
2129 
2130 /*@C
2131    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2132    the user should preallocate the matrix storage by setting the parameters
2133    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2134    performance can be increased by more than a factor of 50.
2135 
2136    Collective on B
2137 
2138    Input Parameters:
2139 +  B - the matrix
2140 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2141           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2142 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2143            submatrix  (same for all local rows)
2144 .  d_nnz - array containing the number of block nonzeros in the various block rows
2145            in the upper triangular and diagonal part of the in diagonal portion of the local
2146            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2147            for the diagonal entry and set a value even if it is zero.
2148 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2149            submatrix (same for all local rows).
2150 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2151            off-diagonal portion of the local submatrix that is right of the diagonal
2152            (possibly different for each block row) or NULL.
2153 
2154    Options Database Keys:
2155 +   -mat_no_unroll - uses code that does not unroll the loops in the
2156                      block calculations (much slower)
2157 -   -mat_block_size - size of the blocks to use
2158 
2159    Notes:
2160 
2161    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2162    than it must be used on all processors that share the object for that argument.
2163 
2164    If the *_nnz parameter is given then the *_nz parameter is ignored
2165 
2166    Storage Information:
2167    For a square global matrix we define each processor's diagonal portion
2168    to be its local rows and the corresponding columns (a square submatrix);
2169    each processor's off-diagonal portion encompasses the remainder of the
2170    local matrix (a rectangular submatrix).
2171 
2172    The user can specify preallocated storage for the diagonal part of
2173    the local submatrix with either d_nz or d_nnz (not both).  Set
2174    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2175    memory allocation.  Likewise, specify preallocated storage for the
2176    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2177 
2178    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2179    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2180    You can also run with the option -info and look for messages with the string
2181    malloc in them to see if additional memory allocation was needed.
2182 
2183    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2184    the figure below we depict these three local rows and all columns (0-11).
2185 
2186 .vb
2187            0 1 2 3 4 5 6 7 8 9 10 11
2188           --------------------------
2189    row 3  |. . . d d d o o o o  o  o
2190    row 4  |. . . d d d o o o o  o  o
2191    row 5  |. . . d d d o o o o  o  o
2192           --------------------------
2193 .ve
2194 
2195    Thus, any entries in the d locations are stored in the d (diagonal)
2196    submatrix, and any entries in the o locations are stored in the
2197    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2198    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2199 
2200    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2201    plus the diagonal part of the d matrix,
2202    and o_nz should indicate the number of block nonzeros per row in the o matrix
2203 
2204    In general, for PDE problems in which most nonzeros are near the diagonal,
2205    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2206    or you will get TERRIBLE performance; see the users' manual chapter on
2207    matrices.
2208 
2209    Level: intermediate
2210 
2211 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2212 @*/
2213 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2214   PetscFunctionBegin;
2215   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2216   PetscValidType(B, 1);
2217   PetscValidLogicalCollectiveInt(B, bs, 2);
2218   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 /*@C
2223    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2224    (block compressed row).  For good matrix assembly performance
2225    the user should preallocate the matrix storage by setting the parameters
2226    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2227    performance can be increased by more than a factor of 50.
2228 
2229    Collective
2230 
2231    Input Parameters:
2232 +  comm - MPI communicator
2233 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2234           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2235 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2236            This value should be the same as the local size used in creating the
2237            y vector for the matrix-vector product y = Ax.
2238 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2239            This value should be the same as the local size used in creating the
2240            x vector for the matrix-vector product y = Ax.
2241 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2242 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2243 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2244            submatrix  (same for all local rows)
2245 .  d_nnz - array containing the number of block nonzeros in the various block rows
2246            in the upper triangular portion of the in diagonal portion of the local
2247            (possibly different for each block block row) or NULL.
2248            If you plan to factor the matrix you must leave room for the diagonal entry and
2249            set its value even if it is zero.
2250 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2251            submatrix (same for all local rows).
2252 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2253            off-diagonal portion of the local submatrix (possibly different for
2254            each block row) or NULL.
2255 
2256    Output Parameter:
2257 .  A - the matrix
2258 
2259    Options Database Keys:
2260 +   -mat_no_unroll - uses code that does not unroll the loops in the
2261                      block calculations (much slower)
2262 .   -mat_block_size - size of the blocks to use
2263 -   -mat_mpi - use the parallel matrix data structures even on one processor
2264                (defaults to using SeqBAIJ format on one processor)
2265 
2266    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2267    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2268    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2269 
2270    Notes:
2271    The number of rows and columns must be divisible by blocksize.
2272    This matrix type does not support complex Hermitian operation.
2273 
2274    The user MUST specify either the local or global matrix dimensions
2275    (possibly both).
2276 
2277    If` PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2278    than it must be used on all processors that share the object for that argument.
2279 
2280    If the *_nnz parameter is given then the *_nz parameter is ignored
2281 
2282    Storage Information:
2283    For a square global matrix we define each processor's diagonal portion
2284    to be its local rows and the corresponding columns (a square submatrix);
2285    each processor's off-diagonal portion encompasses the remainder of the
2286    local matrix (a rectangular submatrix).
2287 
2288    The user can specify preallocated storage for the diagonal part of
2289    the local submatrix with either d_nz or d_nnz (not both).  Set
2290    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2291    memory allocation.  Likewise, specify preallocated storage for the
2292    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2293 
2294    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2295    the figure below we depict these three local rows and all columns (0-11).
2296 
2297 .vb
2298            0 1 2 3 4 5 6 7 8 9 10 11
2299           --------------------------
2300    row 3  |. . . d d d o o o o  o  o
2301    row 4  |. . . d d d o o o o  o  o
2302    row 5  |. . . d d d o o o o  o  o
2303           --------------------------
2304 .ve
2305 
2306    Thus, any entries in the d locations are stored in the d (diagonal)
2307    submatrix, and any entries in the o locations are stored in the
2308    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2309    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2310 
2311    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2312    plus the diagonal part of the d matrix,
2313    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2314    In general, for PDE problems in which most nonzeros are near the diagonal,
2315    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2316    or you will get TERRIBLE performance; see the users' manual chapter on
2317    matrices.
2318 
2319    Level: intermediate
2320 
2321 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2322 @*/
2323 
2324 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
2325   PetscMPIInt size;
2326 
2327   PetscFunctionBegin;
2328   PetscCall(MatCreate(comm, A));
2329   PetscCall(MatSetSizes(*A, m, n, M, N));
2330   PetscCallMPI(MPI_Comm_size(comm, &size));
2331   if (size > 1) {
2332     PetscCall(MatSetType(*A, MATMPISBAIJ));
2333     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2334   } else {
2335     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2336     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2337   }
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
2342   Mat           mat;
2343   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2344   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2345   PetscScalar  *array;
2346 
2347   PetscFunctionBegin;
2348   *newmat = NULL;
2349 
2350   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2351   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2352   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2353   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2354   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2355 
2356   mat->factortype   = matin->factortype;
2357   mat->preallocated = PETSC_TRUE;
2358   mat->assembled    = PETSC_TRUE;
2359   mat->insertmode   = NOT_SET_VALUES;
2360 
2361   a      = (Mat_MPISBAIJ *)mat->data;
2362   a->bs2 = oldmat->bs2;
2363   a->mbs = oldmat->mbs;
2364   a->nbs = oldmat->nbs;
2365   a->Mbs = oldmat->Mbs;
2366   a->Nbs = oldmat->Nbs;
2367 
2368   a->size         = oldmat->size;
2369   a->rank         = oldmat->rank;
2370   a->donotstash   = oldmat->donotstash;
2371   a->roworiented  = oldmat->roworiented;
2372   a->rowindices   = NULL;
2373   a->rowvalues    = NULL;
2374   a->getrowactive = PETSC_FALSE;
2375   a->barray       = NULL;
2376   a->rstartbs     = oldmat->rstartbs;
2377   a->rendbs       = oldmat->rendbs;
2378   a->cstartbs     = oldmat->cstartbs;
2379   a->cendbs       = oldmat->cendbs;
2380 
2381   /* hash table stuff */
2382   a->ht           = NULL;
2383   a->hd           = NULL;
2384   a->ht_size      = 0;
2385   a->ht_flag      = oldmat->ht_flag;
2386   a->ht_fact      = oldmat->ht_fact;
2387   a->ht_total_ct  = 0;
2388   a->ht_insert_ct = 0;
2389 
2390   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2391   if (oldmat->colmap) {
2392 #if defined(PETSC_USE_CTABLE)
2393     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2394 #else
2395     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2396     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2397 #endif
2398   } else a->colmap = NULL;
2399 
2400   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2401     PetscCall(PetscMalloc1(len, &a->garray));
2402     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2403   } else a->garray = NULL;
2404 
2405   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2406   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2407   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2408 
2409   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2410   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2411 
2412   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2413   PetscCall(VecGetArray(a->slvec1, &array));
2414   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2415   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2416   PetscCall(VecRestoreArray(a->slvec1, &array));
2417   PetscCall(VecGetArray(a->slvec0, &array));
2418   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2419   PetscCall(VecRestoreArray(a->slvec0, &array));
2420 
2421   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2422   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2423   a->sMvctx = oldmat->sMvctx;
2424 
2425   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2426   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2427   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2428   *newmat = mat;
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2433 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2434 
2435 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) {
2436   PetscBool isbinary;
2437 
2438   PetscFunctionBegin;
2439   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2440   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2441   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) {
2446   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2447   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2448   PetscReal     atmp;
2449   PetscReal    *work, *svalues, *rvalues;
2450   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2451   PetscMPIInt   rank, size;
2452   PetscInt     *rowners_bs, dest, count, source;
2453   PetscScalar  *va;
2454   MatScalar    *ba;
2455   MPI_Status    stat;
2456 
2457   PetscFunctionBegin;
2458   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2459   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2460   PetscCall(VecGetArray(v, &va));
2461 
2462   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2463   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2464 
2465   bs  = A->rmap->bs;
2466   mbs = a->mbs;
2467   Mbs = a->Mbs;
2468   ba  = b->a;
2469   bi  = b->i;
2470   bj  = b->j;
2471 
2472   /* find ownerships */
2473   rowners_bs = A->rmap->range;
2474 
2475   /* each proc creates an array to be distributed */
2476   PetscCall(PetscCalloc1(bs * Mbs, &work));
2477 
2478   /* row_max for B */
2479   if (rank != size - 1) {
2480     for (i = 0; i < mbs; i++) {
2481       ncols = bi[1] - bi[0];
2482       bi++;
2483       brow = bs * i;
2484       for (j = 0; j < ncols; j++) {
2485         bcol = bs * (*bj);
2486         for (kcol = 0; kcol < bs; kcol++) {
2487           col = bcol + kcol;           /* local col index */
2488           col += rowners_bs[rank + 1]; /* global col index */
2489           for (krow = 0; krow < bs; krow++) {
2490             atmp = PetscAbsScalar(*ba);
2491             ba++;
2492             row = brow + krow; /* local row index */
2493             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2494             if (work[col] < atmp) work[col] = atmp;
2495           }
2496         }
2497         bj++;
2498       }
2499     }
2500 
2501     /* send values to its owners */
2502     for (dest = rank + 1; dest < size; dest++) {
2503       svalues = work + rowners_bs[dest];
2504       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2505       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2506     }
2507   }
2508 
2509   /* receive values */
2510   if (rank) {
2511     rvalues = work;
2512     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2513     for (source = 0; source < rank; source++) {
2514       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2515       /* process values */
2516       for (i = 0; i < count; i++) {
2517         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2518       }
2519     }
2520   }
2521 
2522   PetscCall(VecRestoreArray(v, &va));
2523   PetscCall(PetscFree(work));
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
2528   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2529   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2530   PetscScalar       *x, *ptr, *from;
2531   Vec                bb1;
2532   const PetscScalar *b;
2533 
2534   PetscFunctionBegin;
2535   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2536   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2537 
2538   if (flag == SOR_APPLY_UPPER) {
2539     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2540     PetscFunctionReturn(0);
2541   }
2542 
2543   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2544     if (flag & SOR_ZERO_INITIAL_GUESS) {
2545       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2546       its--;
2547     }
2548 
2549     PetscCall(VecDuplicate(bb, &bb1));
2550     while (its--) {
2551       /* lower triangular part: slvec0b = - B^T*xx */
2552       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2553 
2554       /* copy xx into slvec0a */
2555       PetscCall(VecGetArray(mat->slvec0, &ptr));
2556       PetscCall(VecGetArray(xx, &x));
2557       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2558       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2559 
2560       PetscCall(VecScale(mat->slvec0, -1.0));
2561 
2562       /* copy bb into slvec1a */
2563       PetscCall(VecGetArray(mat->slvec1, &ptr));
2564       PetscCall(VecGetArrayRead(bb, &b));
2565       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2566       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2567 
2568       /* set slvec1b = 0 */
2569       PetscCall(VecSet(mat->slvec1b, 0.0));
2570 
2571       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2572       PetscCall(VecRestoreArray(xx, &x));
2573       PetscCall(VecRestoreArrayRead(bb, &b));
2574       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2575 
2576       /* upper triangular part: bb1 = bb1 - B*x */
2577       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2578 
2579       /* local diagonal sweep */
2580       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2581     }
2582     PetscCall(VecDestroy(&bb1));
2583   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2584     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2585   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2586     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2587   } else if (flag & SOR_EISENSTAT) {
2588     Vec                xx1;
2589     PetscBool          hasop;
2590     const PetscScalar *diag;
2591     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2592     PetscInt           i, n;
2593 
2594     if (!mat->xx1) {
2595       PetscCall(VecDuplicate(bb, &mat->xx1));
2596       PetscCall(VecDuplicate(bb, &mat->bb1));
2597     }
2598     xx1 = mat->xx1;
2599     bb1 = mat->bb1;
2600 
2601     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2602 
2603     if (!mat->diag) {
2604       /* this is wrong for same matrix with new nonzero values */
2605       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2606       PetscCall(MatGetDiagonal(matin, mat->diag));
2607     }
2608     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2609 
2610     if (hasop) {
2611       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2612       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2613     } else {
2614       /*
2615           These two lines are replaced by code that may be a bit faster for a good compiler
2616       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2617       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2618       */
2619       PetscCall(VecGetArray(mat->slvec1a, &sl));
2620       PetscCall(VecGetArrayRead(mat->diag, &diag));
2621       PetscCall(VecGetArrayRead(bb, &b));
2622       PetscCall(VecGetArray(xx, &x));
2623       PetscCall(VecGetLocalSize(xx, &n));
2624       if (omega == 1.0) {
2625         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2626         PetscCall(PetscLogFlops(2.0 * n));
2627       } else {
2628         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2629         PetscCall(PetscLogFlops(3.0 * n));
2630       }
2631       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2632       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2633       PetscCall(VecRestoreArrayRead(bb, &b));
2634       PetscCall(VecRestoreArray(xx, &x));
2635     }
2636 
2637     /* multiply off-diagonal portion of matrix */
2638     PetscCall(VecSet(mat->slvec1b, 0.0));
2639     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2640     PetscCall(VecGetArray(mat->slvec0, &from));
2641     PetscCall(VecGetArray(xx, &x));
2642     PetscCall(PetscArraycpy(from, x, bs * mbs));
2643     PetscCall(VecRestoreArray(mat->slvec0, &from));
2644     PetscCall(VecRestoreArray(xx, &x));
2645     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2646     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2647     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2648 
2649     /* local sweep */
2650     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2651     PetscCall(VecAXPY(xx, 1.0, xx1));
2652   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 /*@
2657      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2658          CSR format the local rows.
2659 
2660    Collective
2661 
2662    Input Parameters:
2663 +  comm - MPI communicator
2664 .  bs - the block size, only a block size of 1 is supported
2665 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2666 .  n - This value should be the same as the local size used in creating the
2667        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2668        calculated if N is given) For square matrices n is almost always m.
2669 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2670 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2671 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2672 .   j - column indices
2673 -   a - matrix values
2674 
2675    Output Parameter:
2676 .   mat - the matrix
2677 
2678    Level: intermediate
2679 
2680    Notes:
2681        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2682      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2683      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2684 
2685        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2686 
2687 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2688           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2689 @*/
2690 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
2691   PetscFunctionBegin;
2692   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2693   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2694   PetscCall(MatCreate(comm, mat));
2695   PetscCall(MatSetSizes(*mat, m, n, M, N));
2696   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2697   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 /*@C
2702    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2703 
2704    Collective
2705 
2706    Input Parameters:
2707 +  B - the matrix
2708 .  bs - the block size
2709 .  i - the indices into j for the start of each local row (starts with zero)
2710 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2711 -  v - optional values in the matrix
2712 
2713    Level: advanced
2714 
2715    Notes:
2716    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2717    and usually the numerical values as well
2718 
2719    Any entries below the diagonal are ignored
2720 
2721 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2722 @*/
2723 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
2724   PetscFunctionBegin;
2725   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
2730   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2731   PetscInt    *indx;
2732   PetscScalar *values;
2733 
2734   PetscFunctionBegin;
2735   PetscCall(MatGetSize(inmat, &m, &N));
2736   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2737     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2738     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2739     PetscInt     *bindx, rmax = a->rmax, j;
2740     PetscMPIInt   rank, size;
2741 
2742     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2743     mbs = m / bs;
2744     Nbs = N / cbs;
2745     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2746     nbs = n / cbs;
2747 
2748     PetscCall(PetscMalloc1(rmax, &bindx));
2749     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2750 
2751     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2752     PetscCallMPI(MPI_Comm_rank(comm, &size));
2753     if (rank == size - 1) {
2754       /* Check sum(nbs) = Nbs */
2755       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2756     }
2757 
2758     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2759     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2760     for (i = 0; i < mbs; i++) {
2761       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2762       nnz = nnz / bs;
2763       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2764       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2765       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2766     }
2767     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2768     PetscCall(PetscFree(bindx));
2769 
2770     PetscCall(MatCreate(comm, outmat));
2771     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2772     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2773     PetscCall(MatSetType(*outmat, MATSBAIJ));
2774     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2775     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2776     MatPreallocateEnd(dnz, onz);
2777   }
2778 
2779   /* numeric phase */
2780   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2781   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2782 
2783   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2784   for (i = 0; i < m; i++) {
2785     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2786     Ii = i + rstart;
2787     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2788     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2789   }
2790   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2791   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2792   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2793   PetscFunctionReturn(0);
2794 }
2795