xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4ad8454beace47809662cdae21ee081016eaa39a)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
147     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
187     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_DEBUG)
444   #if defined(PETSC_USE_CTABLE)
445             {
446               PetscInt data;
447               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449             }
450   #else
451             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452   #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456             col = (col - 1) / bs;
457 #else
458             col = (baij->colmap[in[j]] - 1) / bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
461               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462               col = in[j];
463             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464           } else col = in[j];
465           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466         }
467       }
468     } else {
469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473         } else {
474           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 #define HASH_KEY             0.6180339887
483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487 {
488   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489   PetscBool    roworiented = baij->roworiented;
490   PetscInt     i, j, row, col;
491   PetscInt     rstart_orig = mat->rmap->rstart;
492   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494   PetscReal    tmp;
495   MatScalar  **HD       = baij->hd, value;
496   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
497 
498   PetscFunctionBegin;
499   for (i = 0; i < m; i++) {
500     if (PetscDefined(USE_DEBUG)) {
501       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503     }
504     row = im[i];
505     if (row >= rstart_orig && row < rend_orig) {
506       for (j = 0; j < n; j++) {
507         col = in[j];
508         if (roworiented) value = v[i * n + j];
509         else value = v[i + j * m];
510         /* Look up PetscInto the Hash Table */
511         key = (row / bs) * Nbs + (col / bs) + 1;
512         h1  = HASH(size, key, tmp);
513 
514         idx = h1;
515         if (PetscDefined(USE_DEBUG)) {
516           insert_ct++;
517           total_ct++;
518           if (HT[idx] != key) {
519             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
520               ;
521             if (idx == size) {
522               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
523                 ;
524               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
525             }
526           }
527         } else if (HT[idx] != key) {
528           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
529             ;
530           if (idx == size) {
531             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
532               ;
533             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
534           }
535         }
536         /* A HASH table entry is found, so insert the values at the correct address */
537         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
538         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
539       }
540     } else if (!baij->donotstash) {
541       if (roworiented) {
542         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
543       } else {
544         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
545       }
546     }
547   }
548   if (PetscDefined(USE_DEBUG)) {
549     baij->ht_total_ct += total_ct;
550     baij->ht_insert_ct += insert_ct;
551   }
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
556 {
557   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
558   PetscBool          roworiented = baij->roworiented;
559   PetscInt           i, j, ii, jj, row, col;
560   PetscInt           rstart = baij->rstartbs;
561   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
562   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
563   PetscReal          tmp;
564   MatScalar        **HD = baij->hd, *baij_a;
565   const PetscScalar *v_t, *value;
566   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
567 
568   PetscFunctionBegin;
569   if (roworiented) stepval = (n - 1) * bs;
570   else stepval = (m - 1) * bs;
571 
572   for (i = 0; i < m; i++) {
573     if (PetscDefined(USE_DEBUG)) {
574       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
575       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
576     }
577     row = im[i];
578     v_t = v + i * nbs2;
579     if (row >= rstart && row < rend) {
580       for (j = 0; j < n; j++) {
581         col = in[j];
582 
583         /* Look up into the Hash Table */
584         key = row * Nbs + col + 1;
585         h1  = HASH(size, key, tmp);
586 
587         idx = h1;
588         if (PetscDefined(USE_DEBUG)) {
589           total_ct++;
590           insert_ct++;
591           if (HT[idx] != key) {
592             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
593               ;
594             if (idx == size) {
595               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
596                 ;
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
602             ;
603           if (idx == size) {
604             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
605               ;
606             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
607           }
608         }
609         baij_a = HD[idx];
610         if (roworiented) {
611           /*value = v + i*(stepval+bs)*bs + j*bs;*/
612           /* value = v + (i*(stepval+bs)+j)*bs; */
613           value = v_t;
614           v_t += bs;
615           if (addv == ADD_VALUES) {
616             for (ii = 0; ii < bs; ii++, value += stepval) {
617               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
618             }
619           } else {
620             for (ii = 0; ii < bs; ii++, value += stepval) {
621               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
622             }
623           }
624         } else {
625           value = v + j * (stepval + bs) * bs + i * bs;
626           if (addv == ADD_VALUES) {
627             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
628               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
629             }
630           } else {
631             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
632               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
633             }
634           }
635         }
636       }
637     } else {
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   if (PetscDefined(USE_DEBUG)) {
648     baij->ht_total_ct += total_ct;
649     baij->ht_insert_ct += insert_ct;
650   }
651   PetscFunctionReturn(PETSC_SUCCESS);
652 }
653 
654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
655 {
656   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
657   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
658   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
659 
660   PetscFunctionBegin;
661   for (i = 0; i < m; i++) {
662     if (idxm[i] < 0) continue; /* negative row */
663     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
664     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
665       row = idxm[i] - bsrstart;
666       for (j = 0; j < n; j++) {
667         if (idxn[j] < 0) continue; /* negative column */
668         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
669         if (idxn[j] >= bscstart && idxn[j] < bscend) {
670           col = idxn[j] - bscstart;
671           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
672         } else {
673           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
674 #if defined(PETSC_USE_CTABLE)
675           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
676           data--;
677 #else
678           data = baij->colmap[idxn[j] / bs] - 1;
679 #endif
680           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
681           else {
682             col = data + idxn[j] % bs;
683             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
684           }
685         }
686       }
687     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
688   }
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
693 {
694   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
695   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
696   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
697   PetscReal    sum = 0.0;
698   MatScalar   *v;
699 
700   PetscFunctionBegin;
701   if (baij->size == 1) {
702     PetscCall(MatNorm(baij->A, type, nrm));
703   } else {
704     if (type == NORM_FROBENIUS) {
705       v  = amat->a;
706       nz = amat->nz * bs2;
707       for (i = 0; i < nz; i++) {
708         sum += PetscRealPart(PetscConj(*v) * (*v));
709         v++;
710       }
711       v  = bmat->a;
712       nz = bmat->nz * bs2;
713       for (i = 0; i < nz; i++) {
714         sum += PetscRealPart(PetscConj(*v) * (*v));
715         v++;
716       }
717       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
718       *nrm = PetscSqrtReal(*nrm);
719     } else if (type == NORM_1) { /* max column sum */
720       PetscReal *tmp, *tmp2;
721       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
749       *nrm = 0.0;
750       for (j = 0; j < mat->cmap->N; j++) {
751         if (tmp2[j] > *nrm) *nrm = tmp2[j];
752       }
753       PetscCall(PetscFree(tmp));
754       PetscCall(PetscFree(tmp2));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entried need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, other_disassembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the row-oriented flag to column-oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented                           = PETSC_FALSE;
938     a->roworiented                              = PETSC_FALSE;
939     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
940     while (1) {
941       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
942       if (!flg) break;
943 
944       for (i = 0; i < n;) {
945         /* Now identify the consecutive vals belonging to the same row */
946         for (j = i, rstart = row[j]; j < n; j++) {
947           if (row[j] != rstart) break;
948         }
949         if (j < n) ncols = j - i;
950         else ncols = n - i;
951         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
952         i = j;
953       }
954     }
955     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
956 
957     baij->roworiented                           = r1;
958     a->roworiented                              = r2;
959     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
960   }
961 
962   PetscCall(MatAssemblyBegin(baij->A, mode));
963   PetscCall(MatAssemblyEnd(baij->A, mode));
964 
965   /* determine if any processor has disassembled, if so we must
966      also disassemble ourselves, in order that we may reassemble. */
967   /*
968      if nonzero structure of submatrix B cannot change then we know that
969      no processor disassembled thus we can skip this stuff
970   */
971   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
972     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
973     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
974   }
975 
976   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
977   PetscCall(MatAssemblyBegin(baij->B, mode));
978   PetscCall(MatAssemblyEnd(baij->B, mode));
979 
980 #if defined(PETSC_USE_INFO)
981   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
982     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
983 
984     baij->ht_total_ct  = 0;
985     baij->ht_insert_ct = 0;
986   }
987 #endif
988   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
989     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
990 
991     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
992     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
993   }
994 
995   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
996 
997   baij->rowvalues = NULL;
998 
999   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1000   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
1001     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1002     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1008 #include <petscdraw.h>
1009 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1010 {
1011   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1012   PetscMPIInt       rank = baij->rank;
1013   PetscInt          bs   = mat->rmap->bs;
1014   PetscBool         iascii, isdraw;
1015   PetscViewer       sviewer;
1016   PetscViewerFormat format;
1017 
1018   PetscFunctionBegin;
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1021   if (iascii) {
1022     PetscCall(PetscViewerGetFormat(viewer, &format));
1023     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1024       MatInfo info;
1025       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1026       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1027       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1028       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1029                                                    mat->rmap->bs, (double)info.memory));
1030       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1031       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1032       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1033       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1034       PetscCall(PetscViewerFlush(viewer));
1035       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1036       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1037       PetscCall(VecScatterView(baij->Mvctx, viewer));
1038       PetscFunctionReturn(PETSC_SUCCESS);
1039     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1040       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1041       PetscFunctionReturn(PETSC_SUCCESS);
1042     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1043       PetscFunctionReturn(PETSC_SUCCESS);
1044     }
1045   }
1046 
1047   if (isdraw) {
1048     PetscDraw draw;
1049     PetscBool isnull;
1050     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1051     PetscCall(PetscDrawIsNull(draw, &isnull));
1052     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1053   }
1054 
1055   {
1056     /* assemble the entire matrix onto first processor. */
1057     Mat          A;
1058     Mat_SeqBAIJ *Aloc;
1059     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1060     MatScalar   *a;
1061     const char  *matname;
1062 
1063     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1064     /* Perhaps this should be the type of mat? */
1065     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1066     if (rank == 0) {
1067       PetscCall(MatSetSizes(A, M, N, M, N));
1068     } else {
1069       PetscCall(MatSetSizes(A, 0, 0, M, N));
1070     }
1071     PetscCall(MatSetType(A, MATMPIBAIJ));
1072     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1073     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1074 
1075     /* copy over the A part */
1076     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1077     ai   = Aloc->i;
1078     aj   = Aloc->j;
1079     a    = Aloc->a;
1080     PetscCall(PetscMalloc1(bs, &rvals));
1081 
1082     for (i = 0; i < mbs; i++) {
1083       rvals[0] = bs * (baij->rstartbs + i);
1084       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1085       for (j = ai[i]; j < ai[i + 1]; j++) {
1086         col = (baij->cstartbs + aj[j]) * bs;
1087         for (k = 0; k < bs; k++) {
1088           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1089           col++;
1090           a += bs;
1091         }
1092       }
1093     }
1094     /* copy over the B part */
1095     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1096     ai   = Aloc->i;
1097     aj   = Aloc->j;
1098     a    = Aloc->a;
1099     for (i = 0; i < mbs; i++) {
1100       rvals[0] = bs * (baij->rstartbs + i);
1101       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1102       for (j = ai[i]; j < ai[i + 1]; j++) {
1103         col = baij->garray[aj[j]] * bs;
1104         for (k = 0; k < bs; k++) {
1105           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1106           col++;
1107           a += bs;
1108         }
1109       }
1110     }
1111     PetscCall(PetscFree(rvals));
1112     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1113     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1114     /*
1115        Everyone has to call to draw the matrix since the graphics waits are
1116        synchronized across all processors that share the PetscDraw object
1117     */
1118     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1119     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1120     if (rank == 0) {
1121       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1122       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1123     }
1124     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1125     PetscCall(MatDestroy(&A));
1126   }
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1131 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1132 {
1133   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1134   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1135   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1136   const PetscInt *garray = aij->garray;
1137   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1138   PetscInt64      nz, hnz;
1139   PetscInt       *rowlens, *colidxs;
1140   PetscScalar    *matvals;
1141   PetscMPIInt     rank;
1142 
1143   PetscFunctionBegin;
1144   PetscCall(PetscViewerSetUp(viewer));
1145 
1146   M  = mat->rmap->N;
1147   N  = mat->cmap->N;
1148   m  = mat->rmap->n;
1149   rs = mat->rmap->rstart;
1150   cs = mat->cmap->rstart;
1151   bs = mat->rmap->bs;
1152   nz = bs * bs * (A->nz + B->nz);
1153 
1154   /* write matrix header */
1155   header[0] = MAT_FILE_CLASSID;
1156   header[1] = M;
1157   header[2] = N;
1158   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1159   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1160   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1161   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1162 
1163   /* fill in and store row lengths */
1164   PetscCall(PetscMalloc1(m, &rowlens));
1165   for (cnt = 0, i = 0; i < A->mbs; i++)
1166     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1167   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1168   PetscCall(PetscFree(rowlens));
1169 
1170   /* fill in and store column indices */
1171   PetscCall(PetscMalloc1(nz, &colidxs));
1172   for (cnt = 0, i = 0; i < A->mbs; i++) {
1173     for (k = 0; k < bs; k++) {
1174       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1175         if (garray[B->j[jb]] > cs / bs) break;
1176         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1177       }
1178       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1179         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1180       for (; jb < B->i[i + 1]; jb++)
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1182     }
1183   }
1184   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1185   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1186   PetscCall(PetscFree(colidxs));
1187 
1188   /* fill in and store nonzero values */
1189   PetscCall(PetscMalloc1(nz, &matvals));
1190   for (cnt = 0, i = 0; i < A->mbs; i++) {
1191     for (k = 0; k < bs; k++) {
1192       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1193         if (garray[B->j[jb]] > cs / bs) break;
1194         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1195       }
1196       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1197         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1198       for (; jb < B->i[i + 1]; jb++)
1199         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1200     }
1201   }
1202   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1203   PetscCall(PetscFree(matvals));
1204 
1205   /* write block size option to the viewer's .info file */
1206   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1207   PetscFunctionReturn(PETSC_SUCCESS);
1208 }
1209 
1210 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1211 {
1212   PetscBool iascii, isdraw, issocket, isbinary;
1213 
1214   PetscFunctionBegin;
1215   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1216   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1219   if (iascii || isdraw || issocket) {
1220     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1221   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1222   PetscFunctionReturn(PETSC_SUCCESS);
1223 }
1224 
1225 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1226 {
1227   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1228   PetscInt     nt;
1229 
1230   PetscFunctionBegin;
1231   PetscCall(VecGetLocalSize(xx, &nt));
1232   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1233   PetscCall(VecGetLocalSize(yy, &nt));
1234   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1235   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1236   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1237   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1238   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1243 {
1244   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1245 
1246   PetscFunctionBegin;
1247   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1248   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1249   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1250   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1251   PetscFunctionReturn(PETSC_SUCCESS);
1252 }
1253 
1254 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1255 {
1256   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1257 
1258   PetscFunctionBegin;
1259   /* do nondiagonal part */
1260   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1261   /* do local part */
1262   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1263   /* add partial results together */
1264   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1265   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1266   PetscFunctionReturn(PETSC_SUCCESS);
1267 }
1268 
1269 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1270 {
1271   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1272 
1273   PetscFunctionBegin;
1274   /* do nondiagonal part */
1275   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1276   /* do local part */
1277   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1278   /* add partial results together */
1279   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1280   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1281   PetscFunctionReturn(PETSC_SUCCESS);
1282 }
1283 
1284 /*
1285   This only works correctly for square matrices where the subblock A->A is the
1286    diagonal block
1287 */
1288 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1289 {
1290   PetscFunctionBegin;
1291   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1292   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 
1296 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1297 {
1298   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1299 
1300   PetscFunctionBegin;
1301   PetscCall(MatScale(a->A, aa));
1302   PetscCall(MatScale(a->B, aa));
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1307 {
1308   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1309   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1310   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1311   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1312   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1313 
1314   PetscFunctionBegin;
1315   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1316   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1317   mat->getrowactive = PETSC_TRUE;
1318 
1319   if (!mat->rowvalues && (idx || v)) {
1320     /*
1321         allocate enough space to hold information from the longest row.
1322     */
1323     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1324     PetscInt     max = 1, mbs = mat->mbs, tmp;
1325     for (i = 0; i < mbs; i++) {
1326       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1327       if (max < tmp) max = tmp;
1328     }
1329     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1330   }
1331   lrow = row - brstart;
1332 
1333   pvA = &vworkA;
1334   pcA = &cworkA;
1335   pvB = &vworkB;
1336   pcB = &cworkB;
1337   if (!v) {
1338     pvA = NULL;
1339     pvB = NULL;
1340   }
1341   if (!idx) {
1342     pcA = NULL;
1343     if (!v) pcB = NULL;
1344   }
1345   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1346   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1347   nztot = nzA + nzB;
1348 
1349   cmap = mat->garray;
1350   if (v || idx) {
1351     if (nztot) {
1352       /* Sort by increasing column numbers, assuming A and B already sorted */
1353       PetscInt imark = -1;
1354       if (v) {
1355         *v = v_p = mat->rowvalues;
1356         for (i = 0; i < nzB; i++) {
1357           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1358           else break;
1359         }
1360         imark = i;
1361         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1362         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1363       }
1364       if (idx) {
1365         *idx = idx_p = mat->rowindices;
1366         if (imark > -1) {
1367           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368         } else {
1369           for (i = 0; i < nzB; i++) {
1370             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1371             else break;
1372           }
1373           imark = i;
1374         }
1375         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1376         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1377       }
1378     } else {
1379       if (idx) *idx = NULL;
1380       if (v) *v = NULL;
1381     }
1382   }
1383   *nz = nztot;
1384   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1385   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1390 {
1391   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1392 
1393   PetscFunctionBegin;
1394   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1395   baij->getrowactive = PETSC_FALSE;
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1400 {
1401   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1402 
1403   PetscFunctionBegin;
1404   PetscCall(MatZeroEntries(l->A));
1405   PetscCall(MatZeroEntries(l->B));
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1410 {
1411   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1412   Mat            A = a->A, B = a->B;
1413   PetscLogDouble isend[5], irecv[5];
1414 
1415   PetscFunctionBegin;
1416   info->block_size = (PetscReal)matin->rmap->bs;
1417 
1418   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1419 
1420   isend[0] = info->nz_used;
1421   isend[1] = info->nz_allocated;
1422   isend[2] = info->nz_unneeded;
1423   isend[3] = info->memory;
1424   isend[4] = info->mallocs;
1425 
1426   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1427 
1428   isend[0] += info->nz_used;
1429   isend[1] += info->nz_allocated;
1430   isend[2] += info->nz_unneeded;
1431   isend[3] += info->memory;
1432   isend[4] += info->mallocs;
1433 
1434   if (flag == MAT_LOCAL) {
1435     info->nz_used      = isend[0];
1436     info->nz_allocated = isend[1];
1437     info->nz_unneeded  = isend[2];
1438     info->memory       = isend[3];
1439     info->mallocs      = isend[4];
1440   } else if (flag == MAT_GLOBAL_MAX) {
1441     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1442 
1443     info->nz_used      = irecv[0];
1444     info->nz_allocated = irecv[1];
1445     info->nz_unneeded  = irecv[2];
1446     info->memory       = irecv[3];
1447     info->mallocs      = irecv[4];
1448   } else if (flag == MAT_GLOBAL_SUM) {
1449     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1450 
1451     info->nz_used      = irecv[0];
1452     info->nz_allocated = irecv[1];
1453     info->nz_unneeded  = irecv[2];
1454     info->memory       = irecv[3];
1455     info->mallocs      = irecv[4];
1456   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1457   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1458   info->fill_ratio_needed = 0;
1459   info->factor_mallocs    = 0;
1460   PetscFunctionReturn(PETSC_SUCCESS);
1461 }
1462 
1463 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1464 {
1465   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1466 
1467   PetscFunctionBegin;
1468   switch (op) {
1469   case MAT_NEW_NONZERO_LOCATIONS:
1470   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1471   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1472   case MAT_KEEP_NONZERO_PATTERN:
1473   case MAT_NEW_NONZERO_LOCATION_ERR:
1474     MatCheckPreallocated(A, 1);
1475     PetscCall(MatSetOption(a->A, op, flg));
1476     PetscCall(MatSetOption(a->B, op, flg));
1477     break;
1478   case MAT_ROW_ORIENTED:
1479     MatCheckPreallocated(A, 1);
1480     a->roworiented = flg;
1481 
1482     PetscCall(MatSetOption(a->A, op, flg));
1483     PetscCall(MatSetOption(a->B, op, flg));
1484     break;
1485   case MAT_FORCE_DIAGONAL_ENTRIES:
1486   case MAT_SORTED_FULL:
1487     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1488     break;
1489   case MAT_IGNORE_OFF_PROC_ENTRIES:
1490     a->donotstash = flg;
1491     break;
1492   case MAT_USE_HASH_TABLE:
1493     a->ht_flag = flg;
1494     a->ht_fact = 1.39;
1495     break;
1496   case MAT_SYMMETRIC:
1497   case MAT_STRUCTURALLY_SYMMETRIC:
1498   case MAT_HERMITIAN:
1499   case MAT_SUBMAT_SINGLEIS:
1500   case MAT_SYMMETRY_ETERNAL:
1501   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1502   case MAT_SPD_ETERNAL:
1503     /* if the diagonal matrix is square it inherits some of the properties above */
1504     break;
1505   default:
1506     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1507   }
1508   PetscFunctionReturn(PETSC_SUCCESS);
1509 }
1510 
1511 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1512 {
1513   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1514   Mat_SeqBAIJ *Aloc;
1515   Mat          B;
1516   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1517   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1518   MatScalar   *a;
1519 
1520   PetscFunctionBegin;
1521   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1522   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1523     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1524     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1525     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1526     /* Do not know preallocation information, but must set block size */
1527     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1528   } else {
1529     B = *matout;
1530   }
1531 
1532   /* copy over the A part */
1533   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1534   ai   = Aloc->i;
1535   aj   = Aloc->j;
1536   a    = Aloc->a;
1537   PetscCall(PetscMalloc1(bs, &rvals));
1538 
1539   for (i = 0; i < mbs; i++) {
1540     rvals[0] = bs * (baij->rstartbs + i);
1541     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1542     for (j = ai[i]; j < ai[i + 1]; j++) {
1543       col = (baij->cstartbs + aj[j]) * bs;
1544       for (k = 0; k < bs; k++) {
1545         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1546 
1547         col++;
1548         a += bs;
1549       }
1550     }
1551   }
1552   /* copy over the B part */
1553   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1554   ai   = Aloc->i;
1555   aj   = Aloc->j;
1556   a    = Aloc->a;
1557   for (i = 0; i < mbs; i++) {
1558     rvals[0] = bs * (baij->rstartbs + i);
1559     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1560     for (j = ai[i]; j < ai[i + 1]; j++) {
1561       col = baij->garray[aj[j]] * bs;
1562       for (k = 0; k < bs; k++) {
1563         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1564         col++;
1565         a += bs;
1566       }
1567     }
1568   }
1569   PetscCall(PetscFree(rvals));
1570   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1571   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1572 
1573   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1574   else PetscCall(MatHeaderMerge(A, &B));
1575   PetscFunctionReturn(PETSC_SUCCESS);
1576 }
1577 
1578 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1579 {
1580   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1581   Mat          a = baij->A, b = baij->B;
1582   PetscInt     s1, s2, s3;
1583 
1584   PetscFunctionBegin;
1585   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1586   if (rr) {
1587     PetscCall(VecGetLocalSize(rr, &s1));
1588     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1589     /* Overlap communication with computation. */
1590     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1591   }
1592   if (ll) {
1593     PetscCall(VecGetLocalSize(ll, &s1));
1594     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1595     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1596   }
1597   /* scale  the diagonal block */
1598   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1599 
1600   if (rr) {
1601     /* Do a scatter end and then right scale the off-diagonal block */
1602     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1603     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1604   }
1605   PetscFunctionReturn(PETSC_SUCCESS);
1606 }
1607 
1608 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1609 {
1610   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1611   PetscInt    *lrows;
1612   PetscInt     r, len;
1613   PetscBool    cong;
1614 
1615   PetscFunctionBegin;
1616   /* get locally owned rows */
1617   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1618   /* fix right hand side if needed */
1619   if (x && b) {
1620     const PetscScalar *xx;
1621     PetscScalar       *bb;
1622 
1623     PetscCall(VecGetArrayRead(x, &xx));
1624     PetscCall(VecGetArray(b, &bb));
1625     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1626     PetscCall(VecRestoreArrayRead(x, &xx));
1627     PetscCall(VecRestoreArray(b, &bb));
1628   }
1629 
1630   /* actually zap the local rows */
1631   /*
1632         Zero the required rows. If the "diagonal block" of the matrix
1633      is square and the user wishes to set the diagonal we use separate
1634      code so that MatSetValues() is not called for each diagonal allocating
1635      new memory, thus calling lots of mallocs and slowing things down.
1636 
1637   */
1638   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1639   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1640   PetscCall(MatHasCongruentLayouts(A, &cong));
1641   if ((diag != 0.0) && cong) {
1642     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1643   } else if (diag != 0.0) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1645     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1646     for (r = 0; r < len; ++r) {
1647       const PetscInt row = lrows[r] + A->rmap->rstart;
1648       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1649     }
1650     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1651     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1652   } else {
1653     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1654   }
1655   PetscCall(PetscFree(lrows));
1656 
1657   /* only change matrix nonzero state if pattern was allowed to be changed */
1658   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1659     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1660     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1661   }
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1666 {
1667   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1668   PetscMPIInt        n = A->rmap->n, p = 0;
1669   PetscInt           i, j, k, r, len = 0, row, col, count;
1670   PetscInt          *lrows, *owners = A->rmap->range;
1671   PetscSFNode       *rrows;
1672   PetscSF            sf;
1673   const PetscScalar *xx;
1674   PetscScalar       *bb, *mask;
1675   Vec                xmask, lmask;
1676   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1677   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1678   PetscScalar       *aa;
1679 
1680   PetscFunctionBegin;
1681   /* Create SF where leaves are input rows and roots are owned rows */
1682   PetscCall(PetscMalloc1(n, &lrows));
1683   for (r = 0; r < n; ++r) lrows[r] = -1;
1684   PetscCall(PetscMalloc1(N, &rrows));
1685   for (r = 0; r < N; ++r) {
1686     const PetscInt idx = rows[r];
1687     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1688     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1689       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1690     }
1691     rrows[r].rank  = p;
1692     rrows[r].index = rows[r] - owners[p];
1693   }
1694   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1695   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1696   /* Collect flags for rows to be zeroed */
1697   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1698   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1699   PetscCall(PetscSFDestroy(&sf));
1700   /* Compress and put in row numbers */
1701   for (r = 0; r < n; ++r)
1702     if (lrows[r] >= 0) lrows[len++] = r;
1703   /* zero diagonal part of matrix */
1704   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1705   /* handle off-diagonal part of matrix */
1706   PetscCall(MatCreateVecs(A, &xmask, NULL));
1707   PetscCall(VecDuplicate(l->lvec, &lmask));
1708   PetscCall(VecGetArray(xmask, &bb));
1709   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1710   PetscCall(VecRestoreArray(xmask, &bb));
1711   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1712   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1713   PetscCall(VecDestroy(&xmask));
1714   if (x) {
1715     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1716     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1717     PetscCall(VecGetArrayRead(l->lvec, &xx));
1718     PetscCall(VecGetArray(b, &bb));
1719   }
1720   PetscCall(VecGetArray(lmask, &mask));
1721   /* remove zeroed rows of off-diagonal matrix */
1722   for (i = 0; i < len; ++i) {
1723     row   = lrows[i];
1724     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1725     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
1726     for (k = 0; k < count; ++k) {
1727       aa[0] = 0.0;
1728       aa += bs;
1729     }
1730   }
1731   /* loop over all elements of off process part of matrix zeroing removed columns*/
1732   for (i = 0; i < l->B->rmap->N; ++i) {
1733     row = i / bs;
1734     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1735       for (k = 0; k < bs; ++k) {
1736         col = bs * baij->j[j] + k;
1737         if (PetscAbsScalar(mask[col])) {
1738           aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
1739           if (x) bb[i] -= aa[0] * xx[col];
1740           aa[0] = 0.0;
1741         }
1742       }
1743     }
1744   }
1745   if (x) {
1746     PetscCall(VecRestoreArray(b, &bb));
1747     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1748   }
1749   PetscCall(VecRestoreArray(lmask, &mask));
1750   PetscCall(VecDestroy(&lmask));
1751   PetscCall(PetscFree(lrows));
1752 
1753   /* only change matrix nonzero state if pattern was allowed to be changed */
1754   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1755     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1756     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1757   }
1758   PetscFunctionReturn(PETSC_SUCCESS);
1759 }
1760 
1761 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1762 {
1763   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1764 
1765   PetscFunctionBegin;
1766   PetscCall(MatSetUnfactored(a->A));
1767   PetscFunctionReturn(PETSC_SUCCESS);
1768 }
1769 
1770 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1771 
1772 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1773 {
1774   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1775   Mat          a, b, c, d;
1776   PetscBool    flg;
1777 
1778   PetscFunctionBegin;
1779   a = matA->A;
1780   b = matA->B;
1781   c = matB->A;
1782   d = matB->B;
1783 
1784   PetscCall(MatEqual(a, c, &flg));
1785   if (flg) PetscCall(MatEqual(b, d, &flg));
1786   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1787   PetscFunctionReturn(PETSC_SUCCESS);
1788 }
1789 
1790 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1791 {
1792   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1793   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1794 
1795   PetscFunctionBegin;
1796   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1797   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1798     PetscCall(MatCopy_Basic(A, B, str));
1799   } else {
1800     PetscCall(MatCopy(a->A, b->A, str));
1801     PetscCall(MatCopy(a->B, b->B, str));
1802   }
1803   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1808 {
1809   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1810   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1811   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1812 
1813   PetscFunctionBegin;
1814   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1815   PetscFunctionReturn(PETSC_SUCCESS);
1816 }
1817 
1818 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1819 {
1820   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1821   PetscBLASInt bnz, one                         = 1;
1822   Mat_SeqBAIJ *x, *y;
1823   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1824 
1825   PetscFunctionBegin;
1826   if (str == SAME_NONZERO_PATTERN) {
1827     PetscScalar alpha = a;
1828     x                 = (Mat_SeqBAIJ *)xx->A->data;
1829     y                 = (Mat_SeqBAIJ *)yy->A->data;
1830     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1831     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1832     x = (Mat_SeqBAIJ *)xx->B->data;
1833     y = (Mat_SeqBAIJ *)yy->B->data;
1834     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1835     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1836     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1837   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1838     PetscCall(MatAXPY_Basic(Y, a, X, str));
1839   } else {
1840     Mat       B;
1841     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1842     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1843     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1844     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1845     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1846     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1847     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1848     PetscCall(MatSetType(B, MATMPIBAIJ));
1849     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1850     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1851     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1852     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1853     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1854     PetscCall(MatHeaderMerge(Y, &B));
1855     PetscCall(PetscFree(nnz_d));
1856     PetscCall(PetscFree(nnz_o));
1857   }
1858   PetscFunctionReturn(PETSC_SUCCESS);
1859 }
1860 
1861 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1862 {
1863   PetscFunctionBegin;
1864   if (PetscDefined(USE_COMPLEX)) {
1865     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1866 
1867     PetscCall(MatConjugate_SeqBAIJ(a->A));
1868     PetscCall(MatConjugate_SeqBAIJ(a->B));
1869   }
1870   PetscFunctionReturn(PETSC_SUCCESS);
1871 }
1872 
1873 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1874 {
1875   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1876 
1877   PetscFunctionBegin;
1878   PetscCall(MatRealPart(a->A));
1879   PetscCall(MatRealPart(a->B));
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1884 {
1885   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1886 
1887   PetscFunctionBegin;
1888   PetscCall(MatImaginaryPart(a->A));
1889   PetscCall(MatImaginaryPart(a->B));
1890   PetscFunctionReturn(PETSC_SUCCESS);
1891 }
1892 
1893 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1894 {
1895   IS       iscol_local;
1896   PetscInt csize;
1897 
1898   PetscFunctionBegin;
1899   PetscCall(ISGetLocalSize(iscol, &csize));
1900   if (call == MAT_REUSE_MATRIX) {
1901     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1902     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1903   } else {
1904     PetscCall(ISAllGather(iscol, &iscol_local));
1905   }
1906   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1907   if (call == MAT_INITIAL_MATRIX) {
1908     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1909     PetscCall(ISDestroy(&iscol_local));
1910   }
1911   PetscFunctionReturn(PETSC_SUCCESS);
1912 }
1913 
1914 /*
1915   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1916   in local and then by concatenating the local matrices the end result.
1917   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1918   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1919 */
1920 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1921 {
1922   PetscMPIInt  rank, size;
1923   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1924   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1925   Mat          M, Mreuse;
1926   MatScalar   *vwork, *aa;
1927   MPI_Comm     comm;
1928   IS           isrow_new, iscol_new;
1929   Mat_SeqBAIJ *aij;
1930 
1931   PetscFunctionBegin;
1932   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1933   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1934   PetscCallMPI(MPI_Comm_size(comm, &size));
1935   /* The compression and expansion should be avoided. Doesn't point
1936      out errors, might change the indices, hence buggey */
1937   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1938   if (isrow == iscol) {
1939     iscol_new = isrow_new;
1940     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1941   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1942 
1943   if (call == MAT_REUSE_MATRIX) {
1944     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1945     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1946     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1947   } else {
1948     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1949   }
1950   PetscCall(ISDestroy(&isrow_new));
1951   PetscCall(ISDestroy(&iscol_new));
1952   /*
1953       m - number of local rows
1954       n - number of columns (same on all processors)
1955       rstart - first row in new global matrix generated
1956   */
1957   PetscCall(MatGetBlockSize(mat, &bs));
1958   PetscCall(MatGetSize(Mreuse, &m, &n));
1959   m = m / bs;
1960   n = n / bs;
1961 
1962   if (call == MAT_INITIAL_MATRIX) {
1963     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1964     ii  = aij->i;
1965     jj  = aij->j;
1966 
1967     /*
1968         Determine the number of non-zeros in the diagonal and off-diagonal
1969         portions of the matrix in order to do correct preallocation
1970     */
1971 
1972     /* first get start and end of "diagonal" columns */
1973     if (csize == PETSC_DECIDE) {
1974       PetscCall(ISGetSize(isrow, &mglobal));
1975       if (mglobal == n * bs) { /* square matrix */
1976         nlocal = m;
1977       } else {
1978         nlocal = n / size + ((n % size) > rank);
1979       }
1980     } else {
1981       nlocal = csize / bs;
1982     }
1983     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1984     rstart = rend - nlocal;
1985     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1986 
1987     /* next, compute all the lengths */
1988     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1989     for (i = 0; i < m; i++) {
1990       jend = ii[i + 1] - ii[i];
1991       olen = 0;
1992       dlen = 0;
1993       for (j = 0; j < jend; j++) {
1994         if (*jj < rstart || *jj >= rend) olen++;
1995         else dlen++;
1996         jj++;
1997       }
1998       olens[i] = olen;
1999       dlens[i] = dlen;
2000     }
2001     PetscCall(MatCreate(comm, &M));
2002     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2003     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2004     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2005     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2006     PetscCall(PetscFree2(dlens, olens));
2007   } else {
2008     PetscInt ml, nl;
2009 
2010     M = *newmat;
2011     PetscCall(MatGetLocalSize(M, &ml, &nl));
2012     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2013     PetscCall(MatZeroEntries(M));
2014     /*
2015          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2016        rather than the slower MatSetValues().
2017     */
2018     M->was_assembled = PETSC_TRUE;
2019     M->assembled     = PETSC_FALSE;
2020   }
2021   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2022   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2023   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2024   ii  = aij->i;
2025   jj  = aij->j;
2026   aa  = aij->a;
2027   for (i = 0; i < m; i++) {
2028     row   = rstart / bs + i;
2029     nz    = ii[i + 1] - ii[i];
2030     cwork = jj;
2031     jj    = PetscSafePointerPlusOffset(jj, nz);
2032     vwork = aa;
2033     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2034     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2035   }
2036 
2037   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2038   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2039   *newmat = M;
2040 
2041   /* save submatrix used in processor for next request */
2042   if (call == MAT_INITIAL_MATRIX) {
2043     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2044     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2045   }
2046   PetscFunctionReturn(PETSC_SUCCESS);
2047 }
2048 
2049 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2050 {
2051   MPI_Comm        comm, pcomm;
2052   PetscInt        clocal_size, nrows;
2053   const PetscInt *rows;
2054   PetscMPIInt     size;
2055   IS              crowp, lcolp;
2056 
2057   PetscFunctionBegin;
2058   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2059   /* make a collective version of 'rowp' */
2060   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2061   if (pcomm == comm) {
2062     crowp = rowp;
2063   } else {
2064     PetscCall(ISGetSize(rowp, &nrows));
2065     PetscCall(ISGetIndices(rowp, &rows));
2066     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2067     PetscCall(ISRestoreIndices(rowp, &rows));
2068   }
2069   PetscCall(ISSetPermutation(crowp));
2070   /* make a local version of 'colp' */
2071   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2072   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2073   if (size == 1) {
2074     lcolp = colp;
2075   } else {
2076     PetscCall(ISAllGather(colp, &lcolp));
2077   }
2078   PetscCall(ISSetPermutation(lcolp));
2079   /* now we just get the submatrix */
2080   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2081   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2082   /* clean up */
2083   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2084   if (size > 1) PetscCall(ISDestroy(&lcolp));
2085   PetscFunctionReturn(PETSC_SUCCESS);
2086 }
2087 
2088 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2089 {
2090   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2091   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2092 
2093   PetscFunctionBegin;
2094   if (nghosts) *nghosts = B->nbs;
2095   if (ghosts) *ghosts = baij->garray;
2096   PetscFunctionReturn(PETSC_SUCCESS);
2097 }
2098 
2099 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2100 {
2101   Mat          B;
2102   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2103   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2104   Mat_SeqAIJ  *b;
2105   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2106   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2107   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2108 
2109   PetscFunctionBegin;
2110   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2111   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2112 
2113   /*   Tell every processor the number of nonzeros per row  */
2114   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2115   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2116   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2117   displs = recvcounts + size;
2118   for (i = 0; i < size; i++) {
2119     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2120     displs[i]     = A->rmap->range[i] / bs;
2121   }
2122   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2123   /* Create the sequential matrix of the same type as the local block diagonal  */
2124   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2125   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2126   PetscCall(MatSetType(B, MATSEQAIJ));
2127   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2128   b = (Mat_SeqAIJ *)B->data;
2129 
2130   /*     Copy my part of matrix column indices over  */
2131   sendcount  = ad->nz + bd->nz;
2132   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2133   a_jsendbuf = ad->j;
2134   b_jsendbuf = bd->j;
2135   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2136   cnt        = 0;
2137   for (i = 0; i < n; i++) {
2138     /* put in lower diagonal portion */
2139     m = bd->i[i + 1] - bd->i[i];
2140     while (m > 0) {
2141       /* is it above diagonal (in bd (compressed) numbering) */
2142       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2143       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2144       m--;
2145     }
2146 
2147     /* put in diagonal portion */
2148     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2149 
2150     /* put in upper diagonal portion */
2151     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2152   }
2153   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2154 
2155   /*  Gather all column indices to all processors  */
2156   for (i = 0; i < size; i++) {
2157     recvcounts[i] = 0;
2158     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2159   }
2160   displs[0] = 0;
2161   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2162   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2163   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2164   /* set the b->ilen (length of each row) values */
2165   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2166   /* set the b->i indices */
2167   b->i[0] = 0;
2168   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2169   PetscCall(PetscFree(lens));
2170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2172   PetscCall(PetscFree(recvcounts));
2173 
2174   PetscCall(MatPropagateSymmetryOptions(A, B));
2175   *newmat = B;
2176   PetscFunctionReturn(PETSC_SUCCESS);
2177 }
2178 
2179 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2180 {
2181   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2182   Vec          bb1 = NULL;
2183 
2184   PetscFunctionBegin;
2185   if (flag == SOR_APPLY_UPPER) {
2186     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2187     PetscFunctionReturn(PETSC_SUCCESS);
2188   }
2189 
2190   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2191 
2192   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2193     if (flag & SOR_ZERO_INITIAL_GUESS) {
2194       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2195       its--;
2196     }
2197 
2198     while (its--) {
2199       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2200       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2201 
2202       /* update rhs: bb1 = bb - B*x */
2203       PetscCall(VecScale(mat->lvec, -1.0));
2204       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2205 
2206       /* local sweep */
2207       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2208     }
2209   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2210     if (flag & SOR_ZERO_INITIAL_GUESS) {
2211       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2212       its--;
2213     }
2214     while (its--) {
2215       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2216       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2217 
2218       /* update rhs: bb1 = bb - B*x */
2219       PetscCall(VecScale(mat->lvec, -1.0));
2220       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2221 
2222       /* local sweep */
2223       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2224     }
2225   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2226     if (flag & SOR_ZERO_INITIAL_GUESS) {
2227       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2228       its--;
2229     }
2230     while (its--) {
2231       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2232       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2233 
2234       /* update rhs: bb1 = bb - B*x */
2235       PetscCall(VecScale(mat->lvec, -1.0));
2236       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2237 
2238       /* local sweep */
2239       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2240     }
2241   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2242 
2243   PetscCall(VecDestroy(&bb1));
2244   PetscFunctionReturn(PETSC_SUCCESS);
2245 }
2246 
2247 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2248 {
2249   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2250   PetscInt     m, N, i, *garray = aij->garray;
2251   PetscInt     ib, jb, bs = A->rmap->bs;
2252   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2253   MatScalar   *a_val = a_aij->a;
2254   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2255   MatScalar   *b_val = b_aij->a;
2256   PetscReal   *work;
2257 
2258   PetscFunctionBegin;
2259   PetscCall(MatGetSize(A, &m, &N));
2260   PetscCall(PetscCalloc1(N, &work));
2261   if (type == NORM_2) {
2262     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2263       for (jb = 0; jb < bs; jb++) {
2264         for (ib = 0; ib < bs; ib++) {
2265           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2266           a_val++;
2267         }
2268       }
2269     }
2270     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2271       for (jb = 0; jb < bs; jb++) {
2272         for (ib = 0; ib < bs; ib++) {
2273           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2274           b_val++;
2275         }
2276       }
2277     }
2278   } else if (type == NORM_1) {
2279     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2280       for (jb = 0; jb < bs; jb++) {
2281         for (ib = 0; ib < bs; ib++) {
2282           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2283           a_val++;
2284         }
2285       }
2286     }
2287     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2288       for (jb = 0; jb < bs; jb++) {
2289         for (ib = 0; ib < bs; ib++) {
2290           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2291           b_val++;
2292         }
2293       }
2294     }
2295   } else if (type == NORM_INFINITY) {
2296     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2297       for (jb = 0; jb < bs; jb++) {
2298         for (ib = 0; ib < bs; ib++) {
2299           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2300           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2301           a_val++;
2302         }
2303       }
2304     }
2305     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2306       for (jb = 0; jb < bs; jb++) {
2307         for (ib = 0; ib < bs; ib++) {
2308           int col   = garray[b_aij->j[i]] * bs + jb;
2309           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2310           b_val++;
2311         }
2312       }
2313     }
2314   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2315     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2316       for (jb = 0; jb < bs; jb++) {
2317         for (ib = 0; ib < bs; ib++) {
2318           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2319           a_val++;
2320         }
2321       }
2322     }
2323     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2324       for (jb = 0; jb < bs; jb++) {
2325         for (ib = 0; ib < bs; ib++) {
2326           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2327           b_val++;
2328         }
2329       }
2330     }
2331   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2332     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2333       for (jb = 0; jb < bs; jb++) {
2334         for (ib = 0; ib < bs; ib++) {
2335           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2336           a_val++;
2337         }
2338       }
2339     }
2340     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2341       for (jb = 0; jb < bs; jb++) {
2342         for (ib = 0; ib < bs; ib++) {
2343           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2344           b_val++;
2345         }
2346       }
2347     }
2348   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2349   if (type == NORM_INFINITY) {
2350     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2351   } else {
2352     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2353   }
2354   PetscCall(PetscFree(work));
2355   if (type == NORM_2) {
2356     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2357   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2358     for (i = 0; i < N; i++) reductions[i] /= m;
2359   }
2360   PetscFunctionReturn(PETSC_SUCCESS);
2361 }
2362 
2363 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2364 {
2365   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2366 
2367   PetscFunctionBegin;
2368   PetscCall(MatInvertBlockDiagonal(a->A, values));
2369   A->factorerrortype             = a->A->factorerrortype;
2370   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2371   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2372   PetscFunctionReturn(PETSC_SUCCESS);
2373 }
2374 
2375 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2376 {
2377   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2378   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2379 
2380   PetscFunctionBegin;
2381   if (!Y->preallocated) {
2382     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2383   } else if (!aij->nz) {
2384     PetscInt nonew = aij->nonew;
2385     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2386     aij->nonew = nonew;
2387   }
2388   PetscCall(MatShift_Basic(Y, a));
2389   PetscFunctionReturn(PETSC_SUCCESS);
2390 }
2391 
2392 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2393 {
2394   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2395 
2396   PetscFunctionBegin;
2397   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2398   PetscCall(MatMissingDiagonal(a->A, missing, d));
2399   if (d) {
2400     PetscInt rstart;
2401     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2402     *d += rstart / A->rmap->bs;
2403   }
2404   PetscFunctionReturn(PETSC_SUCCESS);
2405 }
2406 
2407 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2408 {
2409   PetscFunctionBegin;
2410   *a = ((Mat_MPIBAIJ *)A->data)->A;
2411   PetscFunctionReturn(PETSC_SUCCESS);
2412 }
2413 
2414 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2415 {
2416   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2417 
2418   PetscFunctionBegin;
2419   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2420   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2421   PetscFunctionReturn(PETSC_SUCCESS);
2422 }
2423 
2424 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2425                                        MatGetRow_MPIBAIJ,
2426                                        MatRestoreRow_MPIBAIJ,
2427                                        MatMult_MPIBAIJ,
2428                                        /* 4*/ MatMultAdd_MPIBAIJ,
2429                                        MatMultTranspose_MPIBAIJ,
2430                                        MatMultTransposeAdd_MPIBAIJ,
2431                                        NULL,
2432                                        NULL,
2433                                        NULL,
2434                                        /*10*/ NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        MatSOR_MPIBAIJ,
2438                                        MatTranspose_MPIBAIJ,
2439                                        /*15*/ MatGetInfo_MPIBAIJ,
2440                                        MatEqual_MPIBAIJ,
2441                                        MatGetDiagonal_MPIBAIJ,
2442                                        MatDiagonalScale_MPIBAIJ,
2443                                        MatNorm_MPIBAIJ,
2444                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2445                                        MatAssemblyEnd_MPIBAIJ,
2446                                        MatSetOption_MPIBAIJ,
2447                                        MatZeroEntries_MPIBAIJ,
2448                                        /*24*/ MatZeroRows_MPIBAIJ,
2449                                        NULL,
2450                                        NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        /*29*/ MatSetUp_MPI_Hash,
2454                                        NULL,
2455                                        NULL,
2456                                        MatGetDiagonalBlock_MPIBAIJ,
2457                                        NULL,
2458                                        /*34*/ MatDuplicate_MPIBAIJ,
2459                                        NULL,
2460                                        NULL,
2461                                        NULL,
2462                                        NULL,
2463                                        /*39*/ MatAXPY_MPIBAIJ,
2464                                        MatCreateSubMatrices_MPIBAIJ,
2465                                        MatIncreaseOverlap_MPIBAIJ,
2466                                        MatGetValues_MPIBAIJ,
2467                                        MatCopy_MPIBAIJ,
2468                                        /*44*/ NULL,
2469                                        MatScale_MPIBAIJ,
2470                                        MatShift_MPIBAIJ,
2471                                        NULL,
2472                                        MatZeroRowsColumns_MPIBAIJ,
2473                                        /*49*/ NULL,
2474                                        NULL,
2475                                        NULL,
2476                                        NULL,
2477                                        NULL,
2478                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2479                                        NULL,
2480                                        MatSetUnfactored_MPIBAIJ,
2481                                        MatPermute_MPIBAIJ,
2482                                        MatSetValuesBlocked_MPIBAIJ,
2483                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2484                                        MatDestroy_MPIBAIJ,
2485                                        MatView_MPIBAIJ,
2486                                        NULL,
2487                                        NULL,
2488                                        /*64*/ NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        /*74*/ NULL,
2499                                        MatFDColoringApply_BAIJ,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        /*79*/ NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        MatLoad_MPIBAIJ,
2508                                        /*84*/ NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        NULL,
2513                                        /*89*/ NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        /*94*/ NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        /*99*/ NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        MatConjugate_MPIBAIJ,
2527                                        NULL,
2528                                        /*104*/ NULL,
2529                                        MatRealPart_MPIBAIJ,
2530                                        MatImaginaryPart_MPIBAIJ,
2531                                        NULL,
2532                                        NULL,
2533                                        /*109*/ NULL,
2534                                        NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        MatMissingDiagonal_MPIBAIJ,
2538                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2539                                        NULL,
2540                                        MatGetGhosts_MPIBAIJ,
2541                                        NULL,
2542                                        NULL,
2543                                        /*119*/ NULL,
2544                                        NULL,
2545                                        NULL,
2546                                        NULL,
2547                                        MatGetMultiProcBlock_MPIBAIJ,
2548                                        /*124*/ NULL,
2549                                        MatGetColumnReductions_MPIBAIJ,
2550                                        MatInvertBlockDiagonal_MPIBAIJ,
2551                                        NULL,
2552                                        NULL,
2553                                        /*129*/ NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        NULL,
2558                                        /*134*/ NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        /*139*/ MatSetBlockSizes_Default,
2564                                        NULL,
2565                                        NULL,
2566                                        MatFDColoringSetUp_MPIXAIJ,
2567                                        NULL,
2568                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                        NULL,
2573                                        NULL,
2574                                        /*150*/ NULL,
2575                                        MatEliminateZeros_MPIBAIJ,
2576                                        NULL};
2577 
2578 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2579 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2580 
2581 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2582 {
2583   PetscInt        m, rstart, cstart, cend;
2584   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2585   const PetscInt *JJ          = NULL;
2586   PetscScalar    *values      = NULL;
2587   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2588   PetscBool       nooffprocentries;
2589 
2590   PetscFunctionBegin;
2591   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2592   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2593   PetscCall(PetscLayoutSetUp(B->rmap));
2594   PetscCall(PetscLayoutSetUp(B->cmap));
2595   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2596   m      = B->rmap->n / bs;
2597   rstart = B->rmap->rstart / bs;
2598   cstart = B->cmap->rstart / bs;
2599   cend   = B->cmap->rend / bs;
2600 
2601   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2602   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2603   for (i = 0; i < m; i++) {
2604     nz = ii[i + 1] - ii[i];
2605     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2606     nz_max = PetscMax(nz_max, nz);
2607     dlen   = 0;
2608     olen   = 0;
2609     JJ     = jj + ii[i];
2610     for (j = 0; j < nz; j++) {
2611       if (*JJ < cstart || *JJ >= cend) olen++;
2612       else dlen++;
2613       JJ++;
2614     }
2615     d_nnz[i] = dlen;
2616     o_nnz[i] = olen;
2617   }
2618   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2619   PetscCall(PetscFree2(d_nnz, o_nnz));
2620 
2621   values = (PetscScalar *)V;
2622   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2623   for (i = 0; i < m; i++) {
2624     PetscInt        row   = i + rstart;
2625     PetscInt        ncols = ii[i + 1] - ii[i];
2626     const PetscInt *icols = jj + ii[i];
2627     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2628       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2629       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2630     } else { /* block ordering does not match so we can only insert one block at a time. */
2631       PetscInt j;
2632       for (j = 0; j < ncols; j++) {
2633         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2634         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2635       }
2636     }
2637   }
2638 
2639   if (!V) PetscCall(PetscFree(values));
2640   nooffprocentries    = B->nooffprocentries;
2641   B->nooffprocentries = PETSC_TRUE;
2642   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2643   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2644   B->nooffprocentries = nooffprocentries;
2645 
2646   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2647   PetscFunctionReturn(PETSC_SUCCESS);
2648 }
2649 
2650 /*@C
2651   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2652 
2653   Collective
2654 
2655   Input Parameters:
2656 + B  - the matrix
2657 . bs - the block size
2658 . i  - the indices into `j` for the start of each local row (starts with zero)
2659 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2660 - v  - optional values in the matrix, use `NULL` if not provided
2661 
2662   Level: advanced
2663 
2664   Notes:
2665   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2666   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2667   called this routine.
2668 
2669   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2670   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2671   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2672   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2673   block column and the second index is over columns within a block.
2674 
2675   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2676 
2677 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2678 @*/
2679 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2680 {
2681   PetscFunctionBegin;
2682   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2683   PetscValidType(B, 1);
2684   PetscValidLogicalCollectiveInt(B, bs, 2);
2685   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2686   PetscFunctionReturn(PETSC_SUCCESS);
2687 }
2688 
2689 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2690 {
2691   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2692   PetscInt     i;
2693   PetscMPIInt  size;
2694 
2695   PetscFunctionBegin;
2696   if (B->hash_active) {
2697     B->ops[0]      = b->cops;
2698     B->hash_active = PETSC_FALSE;
2699   }
2700   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2701   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2702   PetscCall(PetscLayoutSetUp(B->rmap));
2703   PetscCall(PetscLayoutSetUp(B->cmap));
2704   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2705 
2706   if (d_nnz) {
2707     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2708   }
2709   if (o_nnz) {
2710     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2711   }
2712 
2713   b->bs2 = bs * bs;
2714   b->mbs = B->rmap->n / bs;
2715   b->nbs = B->cmap->n / bs;
2716   b->Mbs = B->rmap->N / bs;
2717   b->Nbs = B->cmap->N / bs;
2718 
2719   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2720   b->rstartbs = B->rmap->rstart / bs;
2721   b->rendbs   = B->rmap->rend / bs;
2722   b->cstartbs = B->cmap->rstart / bs;
2723   b->cendbs   = B->cmap->rend / bs;
2724 
2725 #if defined(PETSC_USE_CTABLE)
2726   PetscCall(PetscHMapIDestroy(&b->colmap));
2727 #else
2728   PetscCall(PetscFree(b->colmap));
2729 #endif
2730   PetscCall(PetscFree(b->garray));
2731   PetscCall(VecDestroy(&b->lvec));
2732   PetscCall(VecScatterDestroy(&b->Mvctx));
2733 
2734   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2735 
2736   MatSeqXAIJGetOptions_Private(b->B);
2737   PetscCall(MatDestroy(&b->B));
2738   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2739   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2740   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2741   MatSeqXAIJRestoreOptions_Private(b->B);
2742 
2743   MatSeqXAIJGetOptions_Private(b->A);
2744   PetscCall(MatDestroy(&b->A));
2745   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2746   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2747   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2748   MatSeqXAIJRestoreOptions_Private(b->A);
2749 
2750   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2751   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2752   B->preallocated  = PETSC_TRUE;
2753   B->was_assembled = PETSC_FALSE;
2754   B->assembled     = PETSC_FALSE;
2755   PetscFunctionReturn(PETSC_SUCCESS);
2756 }
2757 
2758 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2759 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2760 
2761 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2762 {
2763   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2764   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2765   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2766   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2767 
2768   PetscFunctionBegin;
2769   PetscCall(PetscMalloc1(M + 1, &ii));
2770   ii[0] = 0;
2771   for (i = 0; i < M; i++) {
2772     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2773     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2774     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2775     /* remove one from count of matrix has diagonal */
2776     for (j = id[i]; j < id[i + 1]; j++) {
2777       if (jd[j] == i) {
2778         ii[i + 1]--;
2779         break;
2780       }
2781     }
2782   }
2783   PetscCall(PetscMalloc1(ii[M], &jj));
2784   cnt = 0;
2785   for (i = 0; i < M; i++) {
2786     for (j = io[i]; j < io[i + 1]; j++) {
2787       if (garray[jo[j]] > rstart) break;
2788       jj[cnt++] = garray[jo[j]];
2789     }
2790     for (k = id[i]; k < id[i + 1]; k++) {
2791       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2792     }
2793     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2794   }
2795   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2796   PetscFunctionReturn(PETSC_SUCCESS);
2797 }
2798 
2799 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2800 
2801 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2802 
2803 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2804 {
2805   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2806   Mat_MPIAIJ  *b;
2807   Mat          B;
2808 
2809   PetscFunctionBegin;
2810   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2811 
2812   if (reuse == MAT_REUSE_MATRIX) {
2813     B = *newmat;
2814   } else {
2815     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2816     PetscCall(MatSetType(B, MATMPIAIJ));
2817     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2818     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2819     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2820     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2821   }
2822   b = (Mat_MPIAIJ *)B->data;
2823 
2824   if (reuse == MAT_REUSE_MATRIX) {
2825     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2826     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2827   } else {
2828     PetscInt   *garray = a->garray;
2829     Mat_SeqAIJ *bB;
2830     PetscInt    bs, nnz;
2831     PetscCall(MatDestroy(&b->A));
2832     PetscCall(MatDestroy(&b->B));
2833     /* just clear out the data structure */
2834     PetscCall(MatDisAssemble_MPIAIJ(B));
2835     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2836     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2837 
2838     /* Global numbering for b->B columns */
2839     bB  = (Mat_SeqAIJ *)b->B->data;
2840     bs  = A->rmap->bs;
2841     nnz = bB->i[A->rmap->n];
2842     for (PetscInt k = 0; k < nnz; k++) {
2843       PetscInt bj = bB->j[k] / bs;
2844       PetscInt br = bB->j[k] % bs;
2845       bB->j[k]    = garray[bj] * bs + br;
2846     }
2847   }
2848   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2849   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2850 
2851   if (reuse == MAT_INPLACE_MATRIX) {
2852     PetscCall(MatHeaderReplace(A, &B));
2853   } else {
2854     *newmat = B;
2855   }
2856   PetscFunctionReturn(PETSC_SUCCESS);
2857 }
2858 
2859 /*MC
2860    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2861 
2862    Options Database Keys:
2863 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2864 . -mat_block_size <bs> - set the blocksize used to store the matrix
2865 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2866 - -mat_use_hash_table <fact> - set hash table factor
2867 
2868    Level: beginner
2869 
2870    Note:
2871     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2872     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2873 
2874 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2875 M*/
2876 
2877 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2878 
2879 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2880 {
2881   Mat_MPIBAIJ *b;
2882   PetscBool    flg = PETSC_FALSE;
2883 
2884   PetscFunctionBegin;
2885   PetscCall(PetscNew(&b));
2886   B->data      = (void *)b;
2887   B->ops[0]    = MatOps_Values;
2888   B->assembled = PETSC_FALSE;
2889 
2890   B->insertmode = NOT_SET_VALUES;
2891   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2892   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2893 
2894   /* build local table of row and column ownerships */
2895   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2896 
2897   /* build cache for off array entries formed */
2898   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2899 
2900   b->donotstash  = PETSC_FALSE;
2901   b->colmap      = NULL;
2902   b->garray      = NULL;
2903   b->roworiented = PETSC_TRUE;
2904 
2905   /* stuff used in block assembly */
2906   b->barray = NULL;
2907 
2908   /* stuff used for matrix vector multiply */
2909   b->lvec  = NULL;
2910   b->Mvctx = NULL;
2911 
2912   /* stuff for MatGetRow() */
2913   b->rowindices   = NULL;
2914   b->rowvalues    = NULL;
2915   b->getrowactive = PETSC_FALSE;
2916 
2917   /* hash table stuff */
2918   b->ht           = NULL;
2919   b->hd           = NULL;
2920   b->ht_size      = 0;
2921   b->ht_flag      = PETSC_FALSE;
2922   b->ht_fact      = 0;
2923   b->ht_total_ct  = 0;
2924   b->ht_insert_ct = 0;
2925 
2926   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2927   b->ijonly = PETSC_FALSE;
2928 
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2932 #if defined(PETSC_HAVE_HYPRE)
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2934 #endif
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2942   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2943 
2944   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2945   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2946   if (flg) {
2947     PetscReal fact = 1.39;
2948     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2949     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2950     if (fact <= 1.0) fact = 1.39;
2951     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2952     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2953   }
2954   PetscOptionsEnd();
2955   PetscFunctionReturn(PETSC_SUCCESS);
2956 }
2957 
2958 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2959 /*MC
2960    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2961 
2962    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2963    and `MATMPIBAIJ` otherwise.
2964 
2965    Options Database Keys:
2966 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2967 
2968   Level: beginner
2969 
2970 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2971 M*/
2972 
2973 /*@C
2974   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2975   (block compressed row).
2976 
2977   Collective
2978 
2979   Input Parameters:
2980 + B     - the matrix
2981 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2982           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2983 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2984            submatrix  (same for all local rows)
2985 . d_nnz - array containing the number of block nonzeros in the various block rows
2986            of the in diagonal portion of the local (possibly different for each block
2987            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2988            set it even if it is zero.
2989 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2990            submatrix (same for all local rows).
2991 - o_nnz - array containing the number of nonzeros in the various block rows of the
2992            off-diagonal portion of the local submatrix (possibly different for
2993            each block row) or `NULL`.
2994 
2995    If the *_nnz parameter is given then the *_nz parameter is ignored
2996 
2997   Options Database Keys:
2998 + -mat_block_size            - size of the blocks to use
2999 - -mat_use_hash_table <fact> - set hash table factor
3000 
3001   Level: intermediate
3002 
3003   Notes:
3004   For good matrix assembly performance
3005   the user should preallocate the matrix storage by setting the parameters
3006   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3007   performance can be increased by more than a factor of 50.
3008 
3009   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3010   than it must be used on all processors that share the object for that argument.
3011 
3012   Storage Information:
3013   For a square global matrix we define each processor's diagonal portion
3014   to be its local rows and the corresponding columns (a square submatrix);
3015   each processor's off-diagonal portion encompasses the remainder of the
3016   local matrix (a rectangular submatrix).
3017 
3018   The user can specify preallocated storage for the diagonal part of
3019   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3020   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3021   memory allocation.  Likewise, specify preallocated storage for the
3022   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3023 
3024   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3025   the figure below we depict these three local rows and all columns (0-11).
3026 
3027 .vb
3028            0 1 2 3 4 5 6 7 8 9 10 11
3029           --------------------------
3030    row 3  |o o o d d d o o o o  o  o
3031    row 4  |o o o d d d o o o o  o  o
3032    row 5  |o o o d d d o o o o  o  o
3033           --------------------------
3034 .ve
3035 
3036   Thus, any entries in the d locations are stored in the d (diagonal)
3037   submatrix, and any entries in the o locations are stored in the
3038   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3039   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3040 
3041   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3042   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3043   In general, for PDE problems in which most nonzeros are near the diagonal,
3044   one expects `d_nz` >> `o_nz`.
3045 
3046   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3047   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3048   You can also run with the option `-info` and look for messages with the string
3049   malloc in them to see if additional memory allocation was needed.
3050 
3051 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3052 @*/
3053 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3054 {
3055   PetscFunctionBegin;
3056   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3057   PetscValidType(B, 1);
3058   PetscValidLogicalCollectiveInt(B, bs, 2);
3059   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3060   PetscFunctionReturn(PETSC_SUCCESS);
3061 }
3062 
3063 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3064 /*@C
3065   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3066   (block compressed row).
3067 
3068   Collective
3069 
3070   Input Parameters:
3071 + comm  - MPI communicator
3072 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3073           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3074 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3075            This value should be the same as the local size used in creating the
3076            y vector for the matrix-vector product y = Ax.
3077 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3078            This value should be the same as the local size used in creating the
3079            x vector for the matrix-vector product y = Ax.
3080 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3081 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3082 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3083            submatrix  (same for all local rows)
3084 . d_nnz - array containing the number of nonzero blocks in the various block rows
3085            of the in diagonal portion of the local (possibly different for each block
3086            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3087            and set it even if it is zero.
3088 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3089            submatrix (same for all local rows).
3090 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3091            off-diagonal portion of the local submatrix (possibly different for
3092            each block row) or NULL.
3093 
3094   Output Parameter:
3095 . A - the matrix
3096 
3097   Options Database Keys:
3098 + -mat_block_size            - size of the blocks to use
3099 - -mat_use_hash_table <fact> - set hash table factor
3100 
3101   Level: intermediate
3102 
3103   Notes:
3104   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3105   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3106   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3107 
3108   For good matrix assembly performance
3109   the user should preallocate the matrix storage by setting the parameters
3110   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3111   performance can be increased by more than a factor of 50.
3112 
3113   If the *_nnz parameter is given then the *_nz parameter is ignored
3114 
3115   A nonzero block is any block that as 1 or more nonzeros in it
3116 
3117   The user MUST specify either the local or global matrix dimensions
3118   (possibly both).
3119 
3120   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3121   than it must be used on all processors that share the object for that argument.
3122 
3123   Storage Information:
3124   For a square global matrix we define each processor's diagonal portion
3125   to be its local rows and the corresponding columns (a square submatrix);
3126   each processor's off-diagonal portion encompasses the remainder of the
3127   local matrix (a rectangular submatrix).
3128 
3129   The user can specify preallocated storage for the diagonal part of
3130   the local submatrix with either d_nz or d_nnz (not both).  Set
3131   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3132   memory allocation.  Likewise, specify preallocated storage for the
3133   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3134 
3135   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3136   the figure below we depict these three local rows and all columns (0-11).
3137 
3138 .vb
3139            0 1 2 3 4 5 6 7 8 9 10 11
3140           --------------------------
3141    row 3  |o o o d d d o o o o  o  o
3142    row 4  |o o o d d d o o o o  o  o
3143    row 5  |o o o d d d o o o o  o  o
3144           --------------------------
3145 .ve
3146 
3147   Thus, any entries in the d locations are stored in the d (diagonal)
3148   submatrix, and any entries in the o locations are stored in the
3149   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3150   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3151 
3152   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3153   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3154   In general, for PDE problems in which most nonzeros are near the diagonal,
3155   one expects `d_nz` >> `o_nz`.
3156 
3157 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3158 @*/
3159 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3160 {
3161   PetscMPIInt size;
3162 
3163   PetscFunctionBegin;
3164   PetscCall(MatCreate(comm, A));
3165   PetscCall(MatSetSizes(*A, m, n, M, N));
3166   PetscCallMPI(MPI_Comm_size(comm, &size));
3167   if (size > 1) {
3168     PetscCall(MatSetType(*A, MATMPIBAIJ));
3169     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3170   } else {
3171     PetscCall(MatSetType(*A, MATSEQBAIJ));
3172     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3173   }
3174   PetscFunctionReturn(PETSC_SUCCESS);
3175 }
3176 
3177 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3178 {
3179   Mat          mat;
3180   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3181   PetscInt     len = 0;
3182 
3183   PetscFunctionBegin;
3184   *newmat = NULL;
3185   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3186   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3187   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3188 
3189   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3190   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3191   if (matin->hash_active) {
3192     PetscCall(MatSetUp(mat));
3193   } else {
3194     mat->factortype   = matin->factortype;
3195     mat->preallocated = PETSC_TRUE;
3196     mat->assembled    = PETSC_TRUE;
3197     mat->insertmode   = NOT_SET_VALUES;
3198 
3199     a             = (Mat_MPIBAIJ *)mat->data;
3200     mat->rmap->bs = matin->rmap->bs;
3201     a->bs2        = oldmat->bs2;
3202     a->mbs        = oldmat->mbs;
3203     a->nbs        = oldmat->nbs;
3204     a->Mbs        = oldmat->Mbs;
3205     a->Nbs        = oldmat->Nbs;
3206 
3207     a->size         = oldmat->size;
3208     a->rank         = oldmat->rank;
3209     a->donotstash   = oldmat->donotstash;
3210     a->roworiented  = oldmat->roworiented;
3211     a->rowindices   = NULL;
3212     a->rowvalues    = NULL;
3213     a->getrowactive = PETSC_FALSE;
3214     a->barray       = NULL;
3215     a->rstartbs     = oldmat->rstartbs;
3216     a->rendbs       = oldmat->rendbs;
3217     a->cstartbs     = oldmat->cstartbs;
3218     a->cendbs       = oldmat->cendbs;
3219 
3220     /* hash table stuff */
3221     a->ht           = NULL;
3222     a->hd           = NULL;
3223     a->ht_size      = 0;
3224     a->ht_flag      = oldmat->ht_flag;
3225     a->ht_fact      = oldmat->ht_fact;
3226     a->ht_total_ct  = 0;
3227     a->ht_insert_ct = 0;
3228 
3229     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3230     if (oldmat->colmap) {
3231 #if defined(PETSC_USE_CTABLE)
3232       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3233 #else
3234       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3235       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3236 #endif
3237     } else a->colmap = NULL;
3238 
3239     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3240       PetscCall(PetscMalloc1(len, &a->garray));
3241       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3242     } else a->garray = NULL;
3243 
3244     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3245     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3246     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3247 
3248     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3249     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3250   }
3251   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3252   *newmat = mat;
3253   PetscFunctionReturn(PETSC_SUCCESS);
3254 }
3255 
3256 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3257 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3258 {
3259   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3260   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3261   PetscScalar *matvals;
3262 
3263   PetscFunctionBegin;
3264   PetscCall(PetscViewerSetUp(viewer));
3265 
3266   /* read in matrix header */
3267   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3268   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3269   M  = header[1];
3270   N  = header[2];
3271   nz = header[3];
3272   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3273   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3274   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3275 
3276   /* set block sizes from the viewer's .info file */
3277   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3278   /* set local sizes if not set already */
3279   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3280   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3281   /* set global sizes if not set already */
3282   if (mat->rmap->N < 0) mat->rmap->N = M;
3283   if (mat->cmap->N < 0) mat->cmap->N = N;
3284   PetscCall(PetscLayoutSetUp(mat->rmap));
3285   PetscCall(PetscLayoutSetUp(mat->cmap));
3286 
3287   /* check if the matrix sizes are correct */
3288   PetscCall(MatGetSize(mat, &rows, &cols));
3289   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3290   PetscCall(MatGetBlockSize(mat, &bs));
3291   PetscCall(MatGetLocalSize(mat, &m, &n));
3292   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3293   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3294   mbs = m / bs;
3295   nbs = n / bs;
3296 
3297   /* read in row lengths and build row indices */
3298   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3299   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3300   rowidxs[0] = 0;
3301   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3302   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3303   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3304 
3305   /* read in column indices and matrix values */
3306   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3307   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3308   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3309 
3310   {                /* preallocate matrix storage */
3311     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3312     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3313     PetscBool  sbaij, done;
3314     PetscInt  *d_nnz, *o_nnz;
3315 
3316     PetscCall(PetscBTCreate(nbs, &bt));
3317     PetscCall(PetscHSetICreate(&ht));
3318     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3319     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3320     for (i = 0; i < mbs; i++) {
3321       PetscCall(PetscBTMemzero(nbs, bt));
3322       PetscCall(PetscHSetIClear(ht));
3323       for (k = 0; k < bs; k++) {
3324         PetscInt row = bs * i + k;
3325         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3326           PetscInt col = colidxs[j];
3327           if (!sbaij || col >= row) {
3328             if (col >= cs && col < ce) {
3329               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3330             } else {
3331               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3332               if (done) o_nnz[i]++;
3333             }
3334           }
3335         }
3336       }
3337     }
3338     PetscCall(PetscBTDestroy(&bt));
3339     PetscCall(PetscHSetIDestroy(&ht));
3340     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3341     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3342     PetscCall(PetscFree2(d_nnz, o_nnz));
3343   }
3344 
3345   /* store matrix values */
3346   for (i = 0; i < m; i++) {
3347     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3348     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3349   }
3350 
3351   PetscCall(PetscFree(rowidxs));
3352   PetscCall(PetscFree2(colidxs, matvals));
3353   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3354   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3355   PetscFunctionReturn(PETSC_SUCCESS);
3356 }
3357 
3358 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3359 {
3360   PetscBool isbinary;
3361 
3362   PetscFunctionBegin;
3363   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3364   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3365   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3366   PetscFunctionReturn(PETSC_SUCCESS);
3367 }
3368 
3369 /*@
3370   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3371 
3372   Input Parameters:
3373 + mat  - the matrix
3374 - fact - factor
3375 
3376   Options Database Key:
3377 . -mat_use_hash_table <fact> - provide the factor
3378 
3379   Level: advanced
3380 
3381 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3382 @*/
3383 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3384 {
3385   PetscFunctionBegin;
3386   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3387   PetscFunctionReturn(PETSC_SUCCESS);
3388 }
3389 
3390 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3391 {
3392   Mat_MPIBAIJ *baij;
3393 
3394   PetscFunctionBegin;
3395   baij          = (Mat_MPIBAIJ *)mat->data;
3396   baij->ht_fact = fact;
3397   PetscFunctionReturn(PETSC_SUCCESS);
3398 }
3399 
3400 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3401 {
3402   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3403   PetscBool    flg;
3404 
3405   PetscFunctionBegin;
3406   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3407   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3408   if (Ad) *Ad = a->A;
3409   if (Ao) *Ao = a->B;
3410   if (colmap) *colmap = a->garray;
3411   PetscFunctionReturn(PETSC_SUCCESS);
3412 }
3413 
3414 /*
3415     Special version for direct calls from Fortran (to eliminate two function call overheads
3416 */
3417 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3418   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3419 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3420   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3421 #endif
3422 
3423 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3424 /*@C
3425   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3426 
3427   Collective
3428 
3429   Input Parameters:
3430 + matin  - the matrix
3431 . min    - number of input rows
3432 . im     - input rows
3433 . nin    - number of input columns
3434 . in     - input columns
3435 . v      - numerical values input
3436 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3437 
3438   Level: advanced
3439 
3440   Developer Notes:
3441   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3442 
3443 .seealso: `Mat`, `MatSetValuesBlocked()`
3444 @*/
3445 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3446 {
3447   /* convert input arguments to C version */
3448   Mat        mat = *matin;
3449   PetscInt   m = *min, n = *nin;
3450   InsertMode addv = *addvin;
3451 
3452   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3453   const MatScalar *value;
3454   MatScalar       *barray      = baij->barray;
3455   PetscBool        roworiented = baij->roworiented;
3456   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3457   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3458   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3459 
3460   PetscFunctionBegin;
3461   /* tasks normally handled by MatSetValuesBlocked() */
3462   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3463   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3464   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3465   if (mat->assembled) {
3466     mat->was_assembled = PETSC_TRUE;
3467     mat->assembled     = PETSC_FALSE;
3468   }
3469   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3470 
3471   if (!barray) {
3472     PetscCall(PetscMalloc1(bs2, &barray));
3473     baij->barray = barray;
3474   }
3475 
3476   if (roworiented) stepval = (n - 1) * bs;
3477   else stepval = (m - 1) * bs;
3478 
3479   for (i = 0; i < m; i++) {
3480     if (im[i] < 0) continue;
3481     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3482     if (im[i] >= rstart && im[i] < rend) {
3483       row = im[i] - rstart;
3484       for (j = 0; j < n; j++) {
3485         /* If NumCol = 1 then a copy is not required */
3486         if ((roworiented) && (n == 1)) {
3487           barray = (MatScalar *)v + i * bs2;
3488         } else if ((!roworiented) && (m == 1)) {
3489           barray = (MatScalar *)v + j * bs2;
3490         } else { /* Here a copy is required */
3491           if (roworiented) {
3492             value = v + i * (stepval + bs) * bs + j * bs;
3493           } else {
3494             value = v + j * (stepval + bs) * bs + i * bs;
3495           }
3496           for (ii = 0; ii < bs; ii++, value += stepval) {
3497             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3498           }
3499           barray -= bs2;
3500         }
3501 
3502         if (in[j] >= cstart && in[j] < cend) {
3503           col = in[j] - cstart;
3504           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3505         } else if (in[j] < 0) {
3506           continue;
3507         } else {
3508           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3509           if (mat->was_assembled) {
3510             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3511 
3512 #if defined(PETSC_USE_DEBUG)
3513   #if defined(PETSC_USE_CTABLE)
3514             {
3515               PetscInt data;
3516               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3517               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3518             }
3519   #else
3520             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3521   #endif
3522 #endif
3523 #if defined(PETSC_USE_CTABLE)
3524             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3525             col = (col - 1) / bs;
3526 #else
3527             col = (baij->colmap[in[j]] - 1) / bs;
3528 #endif
3529             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3530               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3531               col = in[j];
3532             }
3533           } else col = in[j];
3534           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3535         }
3536       }
3537     } else {
3538       if (!baij->donotstash) {
3539         if (roworiented) {
3540           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3541         } else {
3542           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3543         }
3544       }
3545     }
3546   }
3547 
3548   /* task normally handled by MatSetValuesBlocked() */
3549   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3550   PetscFunctionReturn(PETSC_SUCCESS);
3551 }
3552 
3553 /*@
3554   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3555 
3556   Collective
3557 
3558   Input Parameters:
3559 + comm - MPI communicator
3560 . bs   - the block size, only a block size of 1 is supported
3561 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3562 . n    - This value should be the same as the local size used in creating the
3563          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3564          calculated if `N` is given) For square matrices `n` is almost always `m`.
3565 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3566 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3567 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3568 . j    - column indices
3569 - a    - matrix values
3570 
3571   Output Parameter:
3572 . mat - the matrix
3573 
3574   Level: intermediate
3575 
3576   Notes:
3577   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3578   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3579   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3580 
3581   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3582   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3583   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3584   with column-major ordering within blocks.
3585 
3586   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3587 
3588 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3589           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3590 @*/
3591 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3592 {
3593   PetscFunctionBegin;
3594   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3595   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3596   PetscCall(MatCreate(comm, mat));
3597   PetscCall(MatSetSizes(*mat, m, n, M, N));
3598   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3599   PetscCall(MatSetBlockSize(*mat, bs));
3600   PetscCall(MatSetUp(*mat));
3601   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3602   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3603   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3604   PetscFunctionReturn(PETSC_SUCCESS);
3605 }
3606 
3607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3608 {
3609   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3610   PetscInt    *indx;
3611   PetscScalar *values;
3612 
3613   PetscFunctionBegin;
3614   PetscCall(MatGetSize(inmat, &m, &N));
3615   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3616     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3617     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3618     PetscInt    *bindx, rmax = a->rmax, j;
3619     PetscMPIInt  rank, size;
3620 
3621     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3622     mbs = m / bs;
3623     Nbs = N / cbs;
3624     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3625     nbs = n / cbs;
3626 
3627     PetscCall(PetscMalloc1(rmax, &bindx));
3628     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3629 
3630     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3631     PetscCallMPI(MPI_Comm_rank(comm, &size));
3632     if (rank == size - 1) {
3633       /* Check sum(nbs) = Nbs */
3634       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3635     }
3636 
3637     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3638     for (i = 0; i < mbs; i++) {
3639       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3640       nnz = nnz / bs;
3641       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3642       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3643       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3644     }
3645     PetscCall(PetscFree(bindx));
3646 
3647     PetscCall(MatCreate(comm, outmat));
3648     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3649     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3650     PetscCall(MatSetType(*outmat, MATBAIJ));
3651     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3652     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3653     MatPreallocateEnd(dnz, onz);
3654     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3655   }
3656 
3657   /* numeric phase */
3658   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3659   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3660 
3661   for (i = 0; i < m; i++) {
3662     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3663     Ii = i + rstart;
3664     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3665     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3666   }
3667   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3668   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3669   PetscFunctionReturn(PETSC_SUCCESS);
3670 }
3671