xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 5e116b5993518cdad187cbd57637026eea6dda60)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
147     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
187     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_DEBUG)
444   #if defined(PETSC_USE_CTABLE)
445             {
446               PetscInt data;
447               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449             }
450   #else
451             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452   #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456             col = (col - 1) / bs;
457 #else
458             col = (baij->colmap[in[j]] - 1) / bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
461               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462               col = in[j];
463             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464           } else col = in[j];
465           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466         }
467       }
468     } else {
469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473         } else {
474           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 #define HASH_KEY             0.6180339887
483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487 {
488   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489   PetscBool    roworiented = baij->roworiented;
490   PetscInt     i, j, row, col;
491   PetscInt     rstart_orig = mat->rmap->rstart;
492   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494   PetscReal    tmp;
495   MatScalar  **HD       = baij->hd, value;
496   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
497 
498   PetscFunctionBegin;
499   for (i = 0; i < m; i++) {
500     if (PetscDefined(USE_DEBUG)) {
501       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503     }
504     row = im[i];
505     if (row >= rstart_orig && row < rend_orig) {
506       for (j = 0; j < n; j++) {
507         col = in[j];
508         if (roworiented) value = v[i * n + j];
509         else value = v[i + j * m];
510         /* Look up PetscInto the Hash Table */
511         key = (row / bs) * Nbs + (col / bs) + 1;
512         h1  = HASH(size, key, tmp);
513 
514         idx = h1;
515         if (PetscDefined(USE_DEBUG)) {
516           insert_ct++;
517           total_ct++;
518           if (HT[idx] != key) {
519             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
520               ;
521             if (idx == size) {
522               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
523                 ;
524               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
525             }
526           }
527         } else if (HT[idx] != key) {
528           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
529             ;
530           if (idx == size) {
531             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
532               ;
533             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
534           }
535         }
536         /* A HASH table entry is found, so insert the values at the correct address */
537         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
538         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
539       }
540     } else if (!baij->donotstash) {
541       if (roworiented) {
542         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
543       } else {
544         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
545       }
546     }
547   }
548   if (PetscDefined(USE_DEBUG)) {
549     baij->ht_total_ct += total_ct;
550     baij->ht_insert_ct += insert_ct;
551   }
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
556 {
557   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
558   PetscBool          roworiented = baij->roworiented;
559   PetscInt           i, j, ii, jj, row, col;
560   PetscInt           rstart = baij->rstartbs;
561   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
562   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
563   PetscReal          tmp;
564   MatScalar        **HD = baij->hd, *baij_a;
565   const PetscScalar *v_t, *value;
566   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
567 
568   PetscFunctionBegin;
569   if (roworiented) stepval = (n - 1) * bs;
570   else stepval = (m - 1) * bs;
571 
572   for (i = 0; i < m; i++) {
573     if (PetscDefined(USE_DEBUG)) {
574       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
575       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
576     }
577     row = im[i];
578     v_t = v + i * nbs2;
579     if (row >= rstart && row < rend) {
580       for (j = 0; j < n; j++) {
581         col = in[j];
582 
583         /* Look up into the Hash Table */
584         key = row * Nbs + col + 1;
585         h1  = HASH(size, key, tmp);
586 
587         idx = h1;
588         if (PetscDefined(USE_DEBUG)) {
589           total_ct++;
590           insert_ct++;
591           if (HT[idx] != key) {
592             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
593               ;
594             if (idx == size) {
595               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
596                 ;
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
602             ;
603           if (idx == size) {
604             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
605               ;
606             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
607           }
608         }
609         baij_a = HD[idx];
610         if (roworiented) {
611           /*value = v + i*(stepval+bs)*bs + j*bs;*/
612           /* value = v + (i*(stepval+bs)+j)*bs; */
613           value = v_t;
614           v_t += bs;
615           if (addv == ADD_VALUES) {
616             for (ii = 0; ii < bs; ii++, value += stepval) {
617               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
618             }
619           } else {
620             for (ii = 0; ii < bs; ii++, value += stepval) {
621               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
622             }
623           }
624         } else {
625           value = v + j * (stepval + bs) * bs + i * bs;
626           if (addv == ADD_VALUES) {
627             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
628               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
629             }
630           } else {
631             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
632               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
633             }
634           }
635         }
636       }
637     } else {
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   if (PetscDefined(USE_DEBUG)) {
648     baij->ht_total_ct += total_ct;
649     baij->ht_insert_ct += insert_ct;
650   }
651   PetscFunctionReturn(PETSC_SUCCESS);
652 }
653 
654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
655 {
656   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
657   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
658   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
659 
660   PetscFunctionBegin;
661   for (i = 0; i < m; i++) {
662     if (idxm[i] < 0) continue; /* negative row */
663     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
664     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
665       row = idxm[i] - bsrstart;
666       for (j = 0; j < n; j++) {
667         if (idxn[j] < 0) continue; /* negative column */
668         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
669         if (idxn[j] >= bscstart && idxn[j] < bscend) {
670           col = idxn[j] - bscstart;
671           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
672         } else {
673           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
674 #if defined(PETSC_USE_CTABLE)
675           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
676           data--;
677 #else
678           data = baij->colmap[idxn[j] / bs] - 1;
679 #endif
680           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
681           else {
682             col = data + idxn[j] % bs;
683             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
684           }
685         }
686       }
687     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
688   }
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
693 {
694   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
695   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
696   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
697   PetscReal    sum = 0.0;
698   MatScalar   *v;
699 
700   PetscFunctionBegin;
701   if (baij->size == 1) {
702     PetscCall(MatNorm(baij->A, type, nrm));
703   } else {
704     if (type == NORM_FROBENIUS) {
705       v  = amat->a;
706       nz = amat->nz * bs2;
707       for (i = 0; i < nz; i++) {
708         sum += PetscRealPart(PetscConj(*v) * (*v));
709         v++;
710       }
711       v  = bmat->a;
712       nz = bmat->nz * bs2;
713       for (i = 0; i < nz; i++) {
714         sum += PetscRealPart(PetscConj(*v) * (*v));
715         v++;
716       }
717       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
718       *nrm = PetscSqrtReal(*nrm);
719     } else if (type == NORM_1) { /* max column sum */
720       PetscReal *tmp, *tmp2;
721       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
749       *nrm = 0.0;
750       for (j = 0; j < mat->cmap->N; j++) {
751         if (tmp2[j] > *nrm) *nrm = tmp2[j];
752       }
753       PetscCall(PetscFree(tmp));
754       PetscCall(PetscFree(tmp2));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entried need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, other_disassembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the row-oriented flag to column-oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented                             = PETSC_FALSE;
938     a->roworiented                                = PETSC_FALSE;
939     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE;
940     while (1) {
941       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
942       if (!flg) break;
943 
944       for (i = 0; i < n;) {
945         /* Now identify the consecutive vals belonging to the same row */
946         for (j = i, rstart = row[j]; j < n; j++) {
947           if (row[j] != rstart) break;
948         }
949         if (j < n) ncols = j - i;
950         else ncols = n - i;
951         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
952         i = j;
953       }
954     }
955     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
956 
957     baij->roworiented                           = r1;
958     a->roworiented                              = r2;
959     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
960   }
961 
962   PetscCall(MatAssemblyBegin(baij->A, mode));
963   PetscCall(MatAssemblyEnd(baij->A, mode));
964 
965   /* determine if any processor has disassembled, if so we must
966      also disassemble ourselves, in order that we may reassemble. */
967   /*
968      if nonzero structure of submatrix B cannot change then we know that
969      no processor disassembled thus we can skip this stuff
970   */
971   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
972     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
973     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
974   }
975 
976   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
977   PetscCall(MatAssemblyBegin(baij->B, mode));
978   PetscCall(MatAssemblyEnd(baij->B, mode));
979 
980 #if defined(PETSC_USE_INFO)
981   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
982     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
983 
984     baij->ht_total_ct  = 0;
985     baij->ht_insert_ct = 0;
986   }
987 #endif
988   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
989     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
990 
991     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
992     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
993   }
994 
995   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
996 
997   baij->rowvalues = NULL;
998 
999   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1000   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1001     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1002     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1008 #include <petscdraw.h>
1009 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1010 {
1011   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1012   PetscMPIInt       rank = baij->rank;
1013   PetscInt          bs   = mat->rmap->bs;
1014   PetscBool         iascii, isdraw;
1015   PetscViewer       sviewer;
1016   PetscViewerFormat format;
1017 
1018   PetscFunctionBegin;
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1021   if (iascii) {
1022     PetscCall(PetscViewerGetFormat(viewer, &format));
1023     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1024       MatInfo info;
1025       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1026       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1027       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1028       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1029                                                    mat->rmap->bs, (double)info.memory));
1030       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1031       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1032       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1033       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1034       PetscCall(PetscViewerFlush(viewer));
1035       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1036       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1037       PetscCall(VecScatterView(baij->Mvctx, viewer));
1038       PetscFunctionReturn(PETSC_SUCCESS);
1039     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1040       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1041       PetscFunctionReturn(PETSC_SUCCESS);
1042     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1043       PetscFunctionReturn(PETSC_SUCCESS);
1044     }
1045   }
1046 
1047   if (isdraw) {
1048     PetscDraw draw;
1049     PetscBool isnull;
1050     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1051     PetscCall(PetscDrawIsNull(draw, &isnull));
1052     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1053   }
1054 
1055   {
1056     /* assemble the entire matrix onto first processor. */
1057     Mat          A;
1058     Mat_SeqBAIJ *Aloc;
1059     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1060     MatScalar   *a;
1061     const char  *matname;
1062 
1063     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1064     /* Perhaps this should be the type of mat? */
1065     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1066     if (rank == 0) {
1067       PetscCall(MatSetSizes(A, M, N, M, N));
1068     } else {
1069       PetscCall(MatSetSizes(A, 0, 0, M, N));
1070     }
1071     PetscCall(MatSetType(A, MATMPIBAIJ));
1072     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1073     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1074 
1075     /* copy over the A part */
1076     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1077     ai   = Aloc->i;
1078     aj   = Aloc->j;
1079     a    = Aloc->a;
1080     PetscCall(PetscMalloc1(bs, &rvals));
1081 
1082     for (i = 0; i < mbs; i++) {
1083       rvals[0] = bs * (baij->rstartbs + i);
1084       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1085       for (j = ai[i]; j < ai[i + 1]; j++) {
1086         col = (baij->cstartbs + aj[j]) * bs;
1087         for (k = 0; k < bs; k++) {
1088           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1089           col++;
1090           a += bs;
1091         }
1092       }
1093     }
1094     /* copy over the B part */
1095     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1096     ai   = Aloc->i;
1097     aj   = Aloc->j;
1098     a    = Aloc->a;
1099     for (i = 0; i < mbs; i++) {
1100       rvals[0] = bs * (baij->rstartbs + i);
1101       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1102       for (j = ai[i]; j < ai[i + 1]; j++) {
1103         col = baij->garray[aj[j]] * bs;
1104         for (k = 0; k < bs; k++) {
1105           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1106           col++;
1107           a += bs;
1108         }
1109       }
1110     }
1111     PetscCall(PetscFree(rvals));
1112     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1113     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1114     /*
1115        Everyone has to call to draw the matrix since the graphics waits are
1116        synchronized across all processors that share the PetscDraw object
1117     */
1118     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1119     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1120     if (rank == 0) {
1121       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1122       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1123     }
1124     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1125     PetscCall(MatDestroy(&A));
1126   }
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1131 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1132 {
1133   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1134   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1135   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1136   const PetscInt *garray = aij->garray;
1137   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1138   PetscInt64      nz, hnz;
1139   PetscInt       *rowlens, *colidxs;
1140   PetscScalar    *matvals;
1141   PetscMPIInt     rank;
1142 
1143   PetscFunctionBegin;
1144   PetscCall(PetscViewerSetUp(viewer));
1145 
1146   M  = mat->rmap->N;
1147   N  = mat->cmap->N;
1148   m  = mat->rmap->n;
1149   rs = mat->rmap->rstart;
1150   cs = mat->cmap->rstart;
1151   bs = mat->rmap->bs;
1152   nz = bs * bs * (A->nz + B->nz);
1153 
1154   /* write matrix header */
1155   header[0] = MAT_FILE_CLASSID;
1156   header[1] = M;
1157   header[2] = N;
1158   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1159   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1160   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1161   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1162 
1163   /* fill in and store row lengths */
1164   PetscCall(PetscMalloc1(m, &rowlens));
1165   for (cnt = 0, i = 0; i < A->mbs; i++)
1166     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1167   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1168   PetscCall(PetscFree(rowlens));
1169 
1170   /* fill in and store column indices */
1171   PetscCall(PetscMalloc1(nz, &colidxs));
1172   for (cnt = 0, i = 0; i < A->mbs; i++) {
1173     for (k = 0; k < bs; k++) {
1174       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1175         if (garray[B->j[jb]] > cs / bs) break;
1176         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1177       }
1178       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1179         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1180       for (; jb < B->i[i + 1]; jb++)
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1182     }
1183   }
1184   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1185   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1186   PetscCall(PetscFree(colidxs));
1187 
1188   /* fill in and store nonzero values */
1189   PetscCall(PetscMalloc1(nz, &matvals));
1190   for (cnt = 0, i = 0; i < A->mbs; i++) {
1191     for (k = 0; k < bs; k++) {
1192       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1193         if (garray[B->j[jb]] > cs / bs) break;
1194         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1195       }
1196       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1197         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1198       for (; jb < B->i[i + 1]; jb++)
1199         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1200     }
1201   }
1202   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1203   PetscCall(PetscFree(matvals));
1204 
1205   /* write block size option to the viewer's .info file */
1206   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1207   PetscFunctionReturn(PETSC_SUCCESS);
1208 }
1209 
1210 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1211 {
1212   PetscBool iascii, isdraw, issocket, isbinary;
1213 
1214   PetscFunctionBegin;
1215   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1216   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1219   if (iascii || isdraw || issocket) {
1220     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1221   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1222   PetscFunctionReturn(PETSC_SUCCESS);
1223 }
1224 
1225 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1226 {
1227   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1228   PetscInt     nt;
1229 
1230   PetscFunctionBegin;
1231   PetscCall(VecGetLocalSize(xx, &nt));
1232   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1233   PetscCall(VecGetLocalSize(yy, &nt));
1234   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1235   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1236   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1237   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1238   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1243 {
1244   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1245 
1246   PetscFunctionBegin;
1247   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1248   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1249   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1250   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1251   PetscFunctionReturn(PETSC_SUCCESS);
1252 }
1253 
1254 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1255 {
1256   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1257 
1258   PetscFunctionBegin;
1259   /* do nondiagonal part */
1260   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1261   /* do local part */
1262   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1263   /* add partial results together */
1264   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1265   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1266   PetscFunctionReturn(PETSC_SUCCESS);
1267 }
1268 
1269 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1270 {
1271   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1272 
1273   PetscFunctionBegin;
1274   /* do nondiagonal part */
1275   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1276   /* do local part */
1277   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1278   /* add partial results together */
1279   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1280   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1281   PetscFunctionReturn(PETSC_SUCCESS);
1282 }
1283 
1284 /*
1285   This only works correctly for square matrices where the subblock A->A is the
1286    diagonal block
1287 */
1288 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1289 {
1290   PetscFunctionBegin;
1291   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1292   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 
1296 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1297 {
1298   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1299 
1300   PetscFunctionBegin;
1301   PetscCall(MatScale(a->A, aa));
1302   PetscCall(MatScale(a->B, aa));
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1307 {
1308   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1309   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1310   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1311   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1312   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1313 
1314   PetscFunctionBegin;
1315   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1316   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1317   mat->getrowactive = PETSC_TRUE;
1318 
1319   if (!mat->rowvalues && (idx || v)) {
1320     /*
1321         allocate enough space to hold information from the longest row.
1322     */
1323     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1324     PetscInt     max = 1, mbs = mat->mbs, tmp;
1325     for (i = 0; i < mbs; i++) {
1326       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1327       if (max < tmp) max = tmp;
1328     }
1329     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1330   }
1331   lrow = row - brstart;
1332 
1333   pvA = &vworkA;
1334   pcA = &cworkA;
1335   pvB = &vworkB;
1336   pcB = &cworkB;
1337   if (!v) {
1338     pvA = NULL;
1339     pvB = NULL;
1340   }
1341   if (!idx) {
1342     pcA = NULL;
1343     if (!v) pcB = NULL;
1344   }
1345   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1346   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1347   nztot = nzA + nzB;
1348 
1349   cmap = mat->garray;
1350   if (v || idx) {
1351     if (nztot) {
1352       /* Sort by increasing column numbers, assuming A and B already sorted */
1353       PetscInt imark = -1;
1354       if (v) {
1355         *v = v_p = mat->rowvalues;
1356         for (i = 0; i < nzB; i++) {
1357           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1358           else break;
1359         }
1360         imark = i;
1361         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1362         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1363       }
1364       if (idx) {
1365         *idx = idx_p = mat->rowindices;
1366         if (imark > -1) {
1367           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368         } else {
1369           for (i = 0; i < nzB; i++) {
1370             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1371             else break;
1372           }
1373           imark = i;
1374         }
1375         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1376         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1377       }
1378     } else {
1379       if (idx) *idx = NULL;
1380       if (v) *v = NULL;
1381     }
1382   }
1383   *nz = nztot;
1384   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1385   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1390 {
1391   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1392 
1393   PetscFunctionBegin;
1394   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1395   baij->getrowactive = PETSC_FALSE;
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1400 {
1401   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1402 
1403   PetscFunctionBegin;
1404   PetscCall(MatZeroEntries(l->A));
1405   PetscCall(MatZeroEntries(l->B));
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1410 {
1411   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1412   Mat            A = a->A, B = a->B;
1413   PetscLogDouble isend[5], irecv[5];
1414 
1415   PetscFunctionBegin;
1416   info->block_size = (PetscReal)matin->rmap->bs;
1417 
1418   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1419 
1420   isend[0] = info->nz_used;
1421   isend[1] = info->nz_allocated;
1422   isend[2] = info->nz_unneeded;
1423   isend[3] = info->memory;
1424   isend[4] = info->mallocs;
1425 
1426   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1427 
1428   isend[0] += info->nz_used;
1429   isend[1] += info->nz_allocated;
1430   isend[2] += info->nz_unneeded;
1431   isend[3] += info->memory;
1432   isend[4] += info->mallocs;
1433 
1434   if (flag == MAT_LOCAL) {
1435     info->nz_used      = isend[0];
1436     info->nz_allocated = isend[1];
1437     info->nz_unneeded  = isend[2];
1438     info->memory       = isend[3];
1439     info->mallocs      = isend[4];
1440   } else if (flag == MAT_GLOBAL_MAX) {
1441     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1442 
1443     info->nz_used      = irecv[0];
1444     info->nz_allocated = irecv[1];
1445     info->nz_unneeded  = irecv[2];
1446     info->memory       = irecv[3];
1447     info->mallocs      = irecv[4];
1448   } else if (flag == MAT_GLOBAL_SUM) {
1449     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1450 
1451     info->nz_used      = irecv[0];
1452     info->nz_allocated = irecv[1];
1453     info->nz_unneeded  = irecv[2];
1454     info->memory       = irecv[3];
1455     info->mallocs      = irecv[4];
1456   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1457   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1458   info->fill_ratio_needed = 0;
1459   info->factor_mallocs    = 0;
1460   PetscFunctionReturn(PETSC_SUCCESS);
1461 }
1462 
1463 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1464 {
1465   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1466 
1467   PetscFunctionBegin;
1468   switch (op) {
1469   case MAT_NEW_NONZERO_LOCATIONS:
1470   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1471   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1472   case MAT_KEEP_NONZERO_PATTERN:
1473   case MAT_NEW_NONZERO_LOCATION_ERR:
1474     MatCheckPreallocated(A, 1);
1475     PetscCall(MatSetOption(a->A, op, flg));
1476     PetscCall(MatSetOption(a->B, op, flg));
1477     break;
1478   case MAT_ROW_ORIENTED:
1479     MatCheckPreallocated(A, 1);
1480     a->roworiented = flg;
1481 
1482     PetscCall(MatSetOption(a->A, op, flg));
1483     PetscCall(MatSetOption(a->B, op, flg));
1484     break;
1485   case MAT_FORCE_DIAGONAL_ENTRIES:
1486   case MAT_SORTED_FULL:
1487     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1488     break;
1489   case MAT_IGNORE_OFF_PROC_ENTRIES:
1490     a->donotstash = flg;
1491     break;
1492   case MAT_USE_HASH_TABLE:
1493     a->ht_flag = flg;
1494     a->ht_fact = 1.39;
1495     break;
1496   case MAT_SYMMETRIC:
1497   case MAT_STRUCTURALLY_SYMMETRIC:
1498   case MAT_HERMITIAN:
1499   case MAT_SUBMAT_SINGLEIS:
1500   case MAT_SYMMETRY_ETERNAL:
1501   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1502   case MAT_SPD_ETERNAL:
1503     /* if the diagonal matrix is square it inherits some of the properties above */
1504     break;
1505   default:
1506     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1507   }
1508   PetscFunctionReturn(PETSC_SUCCESS);
1509 }
1510 
1511 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1512 {
1513   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1514   Mat_SeqBAIJ *Aloc;
1515   Mat          B;
1516   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1517   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1518   MatScalar   *a;
1519 
1520   PetscFunctionBegin;
1521   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1522   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1523     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1524     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1525     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1526     /* Do not know preallocation information, but must set block size */
1527     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1528   } else {
1529     B = *matout;
1530   }
1531 
1532   /* copy over the A part */
1533   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1534   ai   = Aloc->i;
1535   aj   = Aloc->j;
1536   a    = Aloc->a;
1537   PetscCall(PetscMalloc1(bs, &rvals));
1538 
1539   for (i = 0; i < mbs; i++) {
1540     rvals[0] = bs * (baij->rstartbs + i);
1541     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1542     for (j = ai[i]; j < ai[i + 1]; j++) {
1543       col = (baij->cstartbs + aj[j]) * bs;
1544       for (k = 0; k < bs; k++) {
1545         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1546 
1547         col++;
1548         a += bs;
1549       }
1550     }
1551   }
1552   /* copy over the B part */
1553   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1554   ai   = Aloc->i;
1555   aj   = Aloc->j;
1556   a    = Aloc->a;
1557   for (i = 0; i < mbs; i++) {
1558     rvals[0] = bs * (baij->rstartbs + i);
1559     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1560     for (j = ai[i]; j < ai[i + 1]; j++) {
1561       col = baij->garray[aj[j]] * bs;
1562       for (k = 0; k < bs; k++) {
1563         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1564         col++;
1565         a += bs;
1566       }
1567     }
1568   }
1569   PetscCall(PetscFree(rvals));
1570   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1571   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1572 
1573   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1574   else PetscCall(MatHeaderMerge(A, &B));
1575   PetscFunctionReturn(PETSC_SUCCESS);
1576 }
1577 
1578 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1579 {
1580   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1581   Mat          a = baij->A, b = baij->B;
1582   PetscInt     s1, s2, s3;
1583 
1584   PetscFunctionBegin;
1585   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1586   if (rr) {
1587     PetscCall(VecGetLocalSize(rr, &s1));
1588     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1589     /* Overlap communication with computation. */
1590     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1591   }
1592   if (ll) {
1593     PetscCall(VecGetLocalSize(ll, &s1));
1594     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1595     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1596   }
1597   /* scale  the diagonal block */
1598   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1599 
1600   if (rr) {
1601     /* Do a scatter end and then right scale the off-diagonal block */
1602     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1603     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1604   }
1605   PetscFunctionReturn(PETSC_SUCCESS);
1606 }
1607 
1608 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1609 {
1610   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1611   PetscInt    *lrows;
1612   PetscInt     r, len;
1613   PetscBool    cong;
1614 
1615   PetscFunctionBegin;
1616   /* get locally owned rows */
1617   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1618   /* fix right hand side if needed */
1619   if (x && b) {
1620     const PetscScalar *xx;
1621     PetscScalar       *bb;
1622 
1623     PetscCall(VecGetArrayRead(x, &xx));
1624     PetscCall(VecGetArray(b, &bb));
1625     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1626     PetscCall(VecRestoreArrayRead(x, &xx));
1627     PetscCall(VecRestoreArray(b, &bb));
1628   }
1629 
1630   /* actually zap the local rows */
1631   /*
1632         Zero the required rows. If the "diagonal block" of the matrix
1633      is square and the user wishes to set the diagonal we use separate
1634      code so that MatSetValues() is not called for each diagonal allocating
1635      new memory, thus calling lots of mallocs and slowing things down.
1636 
1637   */
1638   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1639   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1640   PetscCall(MatHasCongruentLayouts(A, &cong));
1641   if ((diag != 0.0) && cong) {
1642     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1643   } else if (diag != 0.0) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1645     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1646        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1647     for (r = 0; r < len; ++r) {
1648       const PetscInt row = lrows[r] + A->rmap->rstart;
1649       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1650     }
1651     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1652     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1653   } else {
1654     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1655   }
1656   PetscCall(PetscFree(lrows));
1657 
1658   /* only change matrix nonzero state if pattern was allowed to be changed */
1659   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern || !((Mat_SeqBAIJ *)(l->A->data))->nonew) {
1660     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1661     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1662   }
1663   PetscFunctionReturn(PETSC_SUCCESS);
1664 }
1665 
1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1667 {
1668   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1669   PetscMPIInt        n = A->rmap->n, p = 0;
1670   PetscInt           i, j, k, r, len = 0, row, col, count;
1671   PetscInt          *lrows, *owners = A->rmap->range;
1672   PetscSFNode       *rrows;
1673   PetscSF            sf;
1674   const PetscScalar *xx;
1675   PetscScalar       *bb, *mask;
1676   Vec                xmask, lmask;
1677   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1678   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1679   PetscScalar       *aa;
1680 
1681   PetscFunctionBegin;
1682   /* Create SF where leaves are input rows and roots are owned rows */
1683   PetscCall(PetscMalloc1(n, &lrows));
1684   for (r = 0; r < n; ++r) lrows[r] = -1;
1685   PetscCall(PetscMalloc1(N, &rrows));
1686   for (r = 0; r < N; ++r) {
1687     const PetscInt idx = rows[r];
1688     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1689     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1690       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1691     }
1692     rrows[r].rank  = p;
1693     rrows[r].index = rows[r] - owners[p];
1694   }
1695   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1696   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1697   /* Collect flags for rows to be zeroed */
1698   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1699   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1700   PetscCall(PetscSFDestroy(&sf));
1701   /* Compress and put in row numbers */
1702   for (r = 0; r < n; ++r)
1703     if (lrows[r] >= 0) lrows[len++] = r;
1704   /* zero diagonal part of matrix */
1705   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1706   /* handle off-diagonal part of matrix */
1707   PetscCall(MatCreateVecs(A, &xmask, NULL));
1708   PetscCall(VecDuplicate(l->lvec, &lmask));
1709   PetscCall(VecGetArray(xmask, &bb));
1710   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1711   PetscCall(VecRestoreArray(xmask, &bb));
1712   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1713   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1714   PetscCall(VecDestroy(&xmask));
1715   if (x) {
1716     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1717     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1718     PetscCall(VecGetArrayRead(l->lvec, &xx));
1719     PetscCall(VecGetArray(b, &bb));
1720   }
1721   PetscCall(VecGetArray(lmask, &mask));
1722   /* remove zeroed rows of off-diagonal matrix */
1723   for (i = 0; i < len; ++i) {
1724     row   = lrows[i];
1725     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1726     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1727     for (k = 0; k < count; ++k) {
1728       aa[0] = 0.0;
1729       aa += bs;
1730     }
1731   }
1732   /* loop over all elements of off process part of matrix zeroing removed columns*/
1733   for (i = 0; i < l->B->rmap->N; ++i) {
1734     row = i / bs;
1735     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1736       for (k = 0; k < bs; ++k) {
1737         col = bs * baij->j[j] + k;
1738         if (PetscAbsScalar(mask[col])) {
1739           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1740           if (x) bb[i] -= aa[0] * xx[col];
1741           aa[0] = 0.0;
1742         }
1743       }
1744     }
1745   }
1746   if (x) {
1747     PetscCall(VecRestoreArray(b, &bb));
1748     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1749   }
1750   PetscCall(VecRestoreArray(lmask, &mask));
1751   PetscCall(VecDestroy(&lmask));
1752   PetscCall(PetscFree(lrows));
1753 
1754   /* only change matrix nonzero state if pattern was allowed to be changed */
1755   if (!((Mat_SeqBAIJ *)(l->A->data))->nonew) {
1756     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1757     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1758   }
1759   PetscFunctionReturn(PETSC_SUCCESS);
1760 }
1761 
1762 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1763 {
1764   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1765 
1766   PetscFunctionBegin;
1767   PetscCall(MatSetUnfactored(a->A));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1772 
1773 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1774 {
1775   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1776   Mat          a, b, c, d;
1777   PetscBool    flg;
1778 
1779   PetscFunctionBegin;
1780   a = matA->A;
1781   b = matA->B;
1782   c = matB->A;
1783   d = matB->B;
1784 
1785   PetscCall(MatEqual(a, c, &flg));
1786   if (flg) PetscCall(MatEqual(b, d, &flg));
1787   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1788   PetscFunctionReturn(PETSC_SUCCESS);
1789 }
1790 
1791 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1792 {
1793   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1794   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1795 
1796   PetscFunctionBegin;
1797   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1798   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1799     PetscCall(MatCopy_Basic(A, B, str));
1800   } else {
1801     PetscCall(MatCopy(a->A, b->A, str));
1802     PetscCall(MatCopy(a->B, b->B, str));
1803   }
1804   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1809 {
1810   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1811   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1812   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1813 
1814   PetscFunctionBegin;
1815   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1816   PetscFunctionReturn(PETSC_SUCCESS);
1817 }
1818 
1819 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1820 {
1821   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1822   PetscBLASInt bnz, one                         = 1;
1823   Mat_SeqBAIJ *x, *y;
1824   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1825 
1826   PetscFunctionBegin;
1827   if (str == SAME_NONZERO_PATTERN) {
1828     PetscScalar alpha = a;
1829     x                 = (Mat_SeqBAIJ *)xx->A->data;
1830     y                 = (Mat_SeqBAIJ *)yy->A->data;
1831     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1832     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1833     x = (Mat_SeqBAIJ *)xx->B->data;
1834     y = (Mat_SeqBAIJ *)yy->B->data;
1835     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1836     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1837     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1838   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1839     PetscCall(MatAXPY_Basic(Y, a, X, str));
1840   } else {
1841     Mat       B;
1842     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1843     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1844     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1845     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1846     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1847     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1848     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1849     PetscCall(MatSetType(B, MATMPIBAIJ));
1850     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1851     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1852     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1853     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1854     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1855     PetscCall(MatHeaderMerge(Y, &B));
1856     PetscCall(PetscFree(nnz_d));
1857     PetscCall(PetscFree(nnz_o));
1858   }
1859   PetscFunctionReturn(PETSC_SUCCESS);
1860 }
1861 
1862 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1863 {
1864   PetscFunctionBegin;
1865   if (PetscDefined(USE_COMPLEX)) {
1866     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1867 
1868     PetscCall(MatConjugate_SeqBAIJ(a->A));
1869     PetscCall(MatConjugate_SeqBAIJ(a->B));
1870   }
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1875 {
1876   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1877 
1878   PetscFunctionBegin;
1879   PetscCall(MatRealPart(a->A));
1880   PetscCall(MatRealPart(a->B));
1881   PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883 
1884 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1885 {
1886   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1887 
1888   PetscFunctionBegin;
1889   PetscCall(MatImaginaryPart(a->A));
1890   PetscCall(MatImaginaryPart(a->B));
1891   PetscFunctionReturn(PETSC_SUCCESS);
1892 }
1893 
1894 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1895 {
1896   IS       iscol_local;
1897   PetscInt csize;
1898 
1899   PetscFunctionBegin;
1900   PetscCall(ISGetLocalSize(iscol, &csize));
1901   if (call == MAT_REUSE_MATRIX) {
1902     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1903     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1904   } else {
1905     PetscCall(ISAllGather(iscol, &iscol_local));
1906   }
1907   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1908   if (call == MAT_INITIAL_MATRIX) {
1909     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1910     PetscCall(ISDestroy(&iscol_local));
1911   }
1912   PetscFunctionReturn(PETSC_SUCCESS);
1913 }
1914 
1915 /*
1916   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1917   in local and then by concatenating the local matrices the end result.
1918   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1919   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1920 */
1921 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1922 {
1923   PetscMPIInt  rank, size;
1924   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1925   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1926   Mat          M, Mreuse;
1927   MatScalar   *vwork, *aa;
1928   MPI_Comm     comm;
1929   IS           isrow_new, iscol_new;
1930   Mat_SeqBAIJ *aij;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1934   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1935   PetscCallMPI(MPI_Comm_size(comm, &size));
1936   /* The compression and expansion should be avoided. Doesn't point
1937      out errors, might change the indices, hence buggey */
1938   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1939   if (isrow == iscol) {
1940     iscol_new = isrow_new;
1941     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1942   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1943 
1944   if (call == MAT_REUSE_MATRIX) {
1945     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1946     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1947     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1948   } else {
1949     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1950   }
1951   PetscCall(ISDestroy(&isrow_new));
1952   PetscCall(ISDestroy(&iscol_new));
1953   /*
1954       m - number of local rows
1955       n - number of columns (same on all processors)
1956       rstart - first row in new global matrix generated
1957   */
1958   PetscCall(MatGetBlockSize(mat, &bs));
1959   PetscCall(MatGetSize(Mreuse, &m, &n));
1960   m = m / bs;
1961   n = n / bs;
1962 
1963   if (call == MAT_INITIAL_MATRIX) {
1964     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1965     ii  = aij->i;
1966     jj  = aij->j;
1967 
1968     /*
1969         Determine the number of non-zeros in the diagonal and off-diagonal
1970         portions of the matrix in order to do correct preallocation
1971     */
1972 
1973     /* first get start and end of "diagonal" columns */
1974     if (csize == PETSC_DECIDE) {
1975       PetscCall(ISGetSize(isrow, &mglobal));
1976       if (mglobal == n * bs) { /* square matrix */
1977         nlocal = m;
1978       } else {
1979         nlocal = n / size + ((n % size) > rank);
1980       }
1981     } else {
1982       nlocal = csize / bs;
1983     }
1984     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1985     rstart = rend - nlocal;
1986     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1987 
1988     /* next, compute all the lengths */
1989     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1990     for (i = 0; i < m; i++) {
1991       jend = ii[i + 1] - ii[i];
1992       olen = 0;
1993       dlen = 0;
1994       for (j = 0; j < jend; j++) {
1995         if (*jj < rstart || *jj >= rend) olen++;
1996         else dlen++;
1997         jj++;
1998       }
1999       olens[i] = olen;
2000       dlens[i] = dlen;
2001     }
2002     PetscCall(MatCreate(comm, &M));
2003     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2004     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2005     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2006     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2007     PetscCall(PetscFree2(dlens, olens));
2008   } else {
2009     PetscInt ml, nl;
2010 
2011     M = *newmat;
2012     PetscCall(MatGetLocalSize(M, &ml, &nl));
2013     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2014     PetscCall(MatZeroEntries(M));
2015     /*
2016          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2017        rather than the slower MatSetValues().
2018     */
2019     M->was_assembled = PETSC_TRUE;
2020     M->assembled     = PETSC_FALSE;
2021   }
2022   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2023   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2024   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2025   ii  = aij->i;
2026   jj  = aij->j;
2027   aa  = aij->a;
2028   for (i = 0; i < m; i++) {
2029     row   = rstart / bs + i;
2030     nz    = ii[i + 1] - ii[i];
2031     cwork = jj;
2032     jj    = PetscSafePointerPlusOffset(jj, nz);
2033     vwork = aa;
2034     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2035     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2036   }
2037 
2038   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2039   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2040   *newmat = M;
2041 
2042   /* save submatrix used in processor for next request */
2043   if (call == MAT_INITIAL_MATRIX) {
2044     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2045     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2046   }
2047   PetscFunctionReturn(PETSC_SUCCESS);
2048 }
2049 
2050 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2051 {
2052   MPI_Comm        comm, pcomm;
2053   PetscInt        clocal_size, nrows;
2054   const PetscInt *rows;
2055   PetscMPIInt     size;
2056   IS              crowp, lcolp;
2057 
2058   PetscFunctionBegin;
2059   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2060   /* make a collective version of 'rowp' */
2061   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2062   if (pcomm == comm) {
2063     crowp = rowp;
2064   } else {
2065     PetscCall(ISGetSize(rowp, &nrows));
2066     PetscCall(ISGetIndices(rowp, &rows));
2067     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2068     PetscCall(ISRestoreIndices(rowp, &rows));
2069   }
2070   PetscCall(ISSetPermutation(crowp));
2071   /* make a local version of 'colp' */
2072   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2073   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2074   if (size == 1) {
2075     lcolp = colp;
2076   } else {
2077     PetscCall(ISAllGather(colp, &lcolp));
2078   }
2079   PetscCall(ISSetPermutation(lcolp));
2080   /* now we just get the submatrix */
2081   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2082   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2083   /* clean up */
2084   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2085   if (size > 1) PetscCall(ISDestroy(&lcolp));
2086   PetscFunctionReturn(PETSC_SUCCESS);
2087 }
2088 
2089 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2090 {
2091   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2092   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2093 
2094   PetscFunctionBegin;
2095   if (nghosts) *nghosts = B->nbs;
2096   if (ghosts) *ghosts = baij->garray;
2097   PetscFunctionReturn(PETSC_SUCCESS);
2098 }
2099 
2100 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2101 {
2102   Mat          B;
2103   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2104   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2105   Mat_SeqAIJ  *b;
2106   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2107   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2108   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2109 
2110   PetscFunctionBegin;
2111   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2112   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2113 
2114   /*   Tell every processor the number of nonzeros per row  */
2115   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2116   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2117   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2118   displs = recvcounts + size;
2119   for (i = 0; i < size; i++) {
2120     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2121     displs[i]     = A->rmap->range[i] / bs;
2122   }
2123   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2124   /* Create the sequential matrix of the same type as the local block diagonal  */
2125   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2126   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2127   PetscCall(MatSetType(B, MATSEQAIJ));
2128   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2129   b = (Mat_SeqAIJ *)B->data;
2130 
2131   /*     Copy my part of matrix column indices over  */
2132   sendcount  = ad->nz + bd->nz;
2133   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2134   a_jsendbuf = ad->j;
2135   b_jsendbuf = bd->j;
2136   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2137   cnt        = 0;
2138   for (i = 0; i < n; i++) {
2139     /* put in lower diagonal portion */
2140     m = bd->i[i + 1] - bd->i[i];
2141     while (m > 0) {
2142       /* is it above diagonal (in bd (compressed) numbering) */
2143       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2144       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2145       m--;
2146     }
2147 
2148     /* put in diagonal portion */
2149     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2150 
2151     /* put in upper diagonal portion */
2152     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2153   }
2154   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2155 
2156   /*  Gather all column indices to all processors  */
2157   for (i = 0; i < size; i++) {
2158     recvcounts[i] = 0;
2159     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2160   }
2161   displs[0] = 0;
2162   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2163   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2164   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2165   /* set the b->ilen (length of each row) values */
2166   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2167   /* set the b->i indices */
2168   b->i[0] = 0;
2169   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2170   PetscCall(PetscFree(lens));
2171   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2172   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2173   PetscCall(PetscFree(recvcounts));
2174 
2175   PetscCall(MatPropagateSymmetryOptions(A, B));
2176   *newmat = B;
2177   PetscFunctionReturn(PETSC_SUCCESS);
2178 }
2179 
2180 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2181 {
2182   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2183   Vec          bb1 = NULL;
2184 
2185   PetscFunctionBegin;
2186   if (flag == SOR_APPLY_UPPER) {
2187     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2188     PetscFunctionReturn(PETSC_SUCCESS);
2189   }
2190 
2191   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2192 
2193   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2194     if (flag & SOR_ZERO_INITIAL_GUESS) {
2195       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2196       its--;
2197     }
2198 
2199     while (its--) {
2200       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2201       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2202 
2203       /* update rhs: bb1 = bb - B*x */
2204       PetscCall(VecScale(mat->lvec, -1.0));
2205       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2206 
2207       /* local sweep */
2208       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2209     }
2210   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2211     if (flag & SOR_ZERO_INITIAL_GUESS) {
2212       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2213       its--;
2214     }
2215     while (its--) {
2216       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2217       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2218 
2219       /* update rhs: bb1 = bb - B*x */
2220       PetscCall(VecScale(mat->lvec, -1.0));
2221       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2222 
2223       /* local sweep */
2224       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2225     }
2226   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2227     if (flag & SOR_ZERO_INITIAL_GUESS) {
2228       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2229       its--;
2230     }
2231     while (its--) {
2232       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2233       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2234 
2235       /* update rhs: bb1 = bb - B*x */
2236       PetscCall(VecScale(mat->lvec, -1.0));
2237       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2238 
2239       /* local sweep */
2240       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2241     }
2242   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2243 
2244   PetscCall(VecDestroy(&bb1));
2245   PetscFunctionReturn(PETSC_SUCCESS);
2246 }
2247 
2248 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2249 {
2250   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2251   PetscInt     m, N, i, *garray = aij->garray;
2252   PetscInt     ib, jb, bs = A->rmap->bs;
2253   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2254   MatScalar   *a_val = a_aij->a;
2255   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2256   MatScalar   *b_val = b_aij->a;
2257   PetscReal   *work;
2258 
2259   PetscFunctionBegin;
2260   PetscCall(MatGetSize(A, &m, &N));
2261   PetscCall(PetscCalloc1(N, &work));
2262   if (type == NORM_2) {
2263     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2264       for (jb = 0; jb < bs; jb++) {
2265         for (ib = 0; ib < bs; ib++) {
2266           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2267           a_val++;
2268         }
2269       }
2270     }
2271     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2272       for (jb = 0; jb < bs; jb++) {
2273         for (ib = 0; ib < bs; ib++) {
2274           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2275           b_val++;
2276         }
2277       }
2278     }
2279   } else if (type == NORM_1) {
2280     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2281       for (jb = 0; jb < bs; jb++) {
2282         for (ib = 0; ib < bs; ib++) {
2283           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2284           a_val++;
2285         }
2286       }
2287     }
2288     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2289       for (jb = 0; jb < bs; jb++) {
2290         for (ib = 0; ib < bs; ib++) {
2291           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2292           b_val++;
2293         }
2294       }
2295     }
2296   } else if (type == NORM_INFINITY) {
2297     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2298       for (jb = 0; jb < bs; jb++) {
2299         for (ib = 0; ib < bs; ib++) {
2300           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2301           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2302           a_val++;
2303         }
2304       }
2305     }
2306     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2307       for (jb = 0; jb < bs; jb++) {
2308         for (ib = 0; ib < bs; ib++) {
2309           int col   = garray[b_aij->j[i]] * bs + jb;
2310           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2311           b_val++;
2312         }
2313       }
2314     }
2315   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2316     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2317       for (jb = 0; jb < bs; jb++) {
2318         for (ib = 0; ib < bs; ib++) {
2319           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2320           a_val++;
2321         }
2322       }
2323     }
2324     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2325       for (jb = 0; jb < bs; jb++) {
2326         for (ib = 0; ib < bs; ib++) {
2327           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2328           b_val++;
2329         }
2330       }
2331     }
2332   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2333     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2334       for (jb = 0; jb < bs; jb++) {
2335         for (ib = 0; ib < bs; ib++) {
2336           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2337           a_val++;
2338         }
2339       }
2340     }
2341     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2342       for (jb = 0; jb < bs; jb++) {
2343         for (ib = 0; ib < bs; ib++) {
2344           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2345           b_val++;
2346         }
2347       }
2348     }
2349   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2350   if (type == NORM_INFINITY) {
2351     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2352   } else {
2353     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2354   }
2355   PetscCall(PetscFree(work));
2356   if (type == NORM_2) {
2357     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2358   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2359     for (i = 0; i < N; i++) reductions[i] /= m;
2360   }
2361   PetscFunctionReturn(PETSC_SUCCESS);
2362 }
2363 
2364 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2365 {
2366   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2367 
2368   PetscFunctionBegin;
2369   PetscCall(MatInvertBlockDiagonal(a->A, values));
2370   A->factorerrortype             = a->A->factorerrortype;
2371   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2372   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 
2376 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2377 {
2378   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2379   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2380 
2381   PetscFunctionBegin;
2382   if (!Y->preallocated) {
2383     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2384   } else if (!aij->nz) {
2385     PetscInt nonew = aij->nonew;
2386     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2387     aij->nonew = nonew;
2388   }
2389   PetscCall(MatShift_Basic(Y, a));
2390   PetscFunctionReturn(PETSC_SUCCESS);
2391 }
2392 
2393 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2394 {
2395   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2396 
2397   PetscFunctionBegin;
2398   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2399   PetscCall(MatMissingDiagonal(a->A, missing, d));
2400   if (d) {
2401     PetscInt rstart;
2402     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2403     *d += rstart / A->rmap->bs;
2404   }
2405   PetscFunctionReturn(PETSC_SUCCESS);
2406 }
2407 
2408 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2409 {
2410   PetscFunctionBegin;
2411   *a = ((Mat_MPIBAIJ *)A->data)->A;
2412   PetscFunctionReturn(PETSC_SUCCESS);
2413 }
2414 
2415 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2416 {
2417   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2418 
2419   PetscFunctionBegin;
2420   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2421   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2422   PetscFunctionReturn(PETSC_SUCCESS);
2423 }
2424 
2425 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2426                                        MatGetRow_MPIBAIJ,
2427                                        MatRestoreRow_MPIBAIJ,
2428                                        MatMult_MPIBAIJ,
2429                                        /* 4*/ MatMultAdd_MPIBAIJ,
2430                                        MatMultTranspose_MPIBAIJ,
2431                                        MatMultTransposeAdd_MPIBAIJ,
2432                                        NULL,
2433                                        NULL,
2434                                        NULL,
2435                                        /*10*/ NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        MatSOR_MPIBAIJ,
2439                                        MatTranspose_MPIBAIJ,
2440                                        /*15*/ MatGetInfo_MPIBAIJ,
2441                                        MatEqual_MPIBAIJ,
2442                                        MatGetDiagonal_MPIBAIJ,
2443                                        MatDiagonalScale_MPIBAIJ,
2444                                        MatNorm_MPIBAIJ,
2445                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2446                                        MatAssemblyEnd_MPIBAIJ,
2447                                        MatSetOption_MPIBAIJ,
2448                                        MatZeroEntries_MPIBAIJ,
2449                                        /*24*/ MatZeroRows_MPIBAIJ,
2450                                        NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        NULL,
2454                                        /*29*/ MatSetUp_MPI_Hash,
2455                                        NULL,
2456                                        NULL,
2457                                        MatGetDiagonalBlock_MPIBAIJ,
2458                                        NULL,
2459                                        /*34*/ MatDuplicate_MPIBAIJ,
2460                                        NULL,
2461                                        NULL,
2462                                        NULL,
2463                                        NULL,
2464                                        /*39*/ MatAXPY_MPIBAIJ,
2465                                        MatCreateSubMatrices_MPIBAIJ,
2466                                        MatIncreaseOverlap_MPIBAIJ,
2467                                        MatGetValues_MPIBAIJ,
2468                                        MatCopy_MPIBAIJ,
2469                                        /*44*/ NULL,
2470                                        MatScale_MPIBAIJ,
2471                                        MatShift_MPIBAIJ,
2472                                        NULL,
2473                                        MatZeroRowsColumns_MPIBAIJ,
2474                                        /*49*/ NULL,
2475                                        NULL,
2476                                        NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2480                                        NULL,
2481                                        MatSetUnfactored_MPIBAIJ,
2482                                        MatPermute_MPIBAIJ,
2483                                        MatSetValuesBlocked_MPIBAIJ,
2484                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2485                                        MatDestroy_MPIBAIJ,
2486                                        MatView_MPIBAIJ,
2487                                        NULL,
2488                                        NULL,
2489                                        /*64*/ NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        /*74*/ NULL,
2500                                        MatFDColoringApply_BAIJ,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        /*79*/ NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        MatLoad_MPIBAIJ,
2509                                        /*84*/ NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        /*89*/ NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        /*94*/ NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        /*99*/ NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        MatConjugate_MPIBAIJ,
2528                                        NULL,
2529                                        /*104*/ NULL,
2530                                        MatRealPart_MPIBAIJ,
2531                                        MatImaginaryPart_MPIBAIJ,
2532                                        NULL,
2533                                        NULL,
2534                                        /*109*/ NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        MatMissingDiagonal_MPIBAIJ,
2539                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2540                                        NULL,
2541                                        MatGetGhosts_MPIBAIJ,
2542                                        NULL,
2543                                        NULL,
2544                                        /*119*/ NULL,
2545                                        NULL,
2546                                        NULL,
2547                                        NULL,
2548                                        MatGetMultiProcBlock_MPIBAIJ,
2549                                        /*124*/ NULL,
2550                                        MatGetColumnReductions_MPIBAIJ,
2551                                        MatInvertBlockDiagonal_MPIBAIJ,
2552                                        NULL,
2553                                        NULL,
2554                                        /*129*/ NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        /*134*/ NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        /*139*/ MatSetBlockSizes_Default,
2565                                        NULL,
2566                                        NULL,
2567                                        MatFDColoringSetUp_MPIXAIJ,
2568                                        NULL,
2569                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2570                                        NULL,
2571                                        NULL,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        /*150*/ NULL,
2576                                        MatEliminateZeros_MPIBAIJ,
2577                                        NULL};
2578 
2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2581 
2582 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2583 {
2584   PetscInt        m, rstart, cstart, cend;
2585   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2586   const PetscInt *JJ          = NULL;
2587   PetscScalar    *values      = NULL;
2588   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2589   PetscBool       nooffprocentries;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2593   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2594   PetscCall(PetscLayoutSetUp(B->rmap));
2595   PetscCall(PetscLayoutSetUp(B->cmap));
2596   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2597   m      = B->rmap->n / bs;
2598   rstart = B->rmap->rstart / bs;
2599   cstart = B->cmap->rstart / bs;
2600   cend   = B->cmap->rend / bs;
2601 
2602   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2603   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2604   for (i = 0; i < m; i++) {
2605     nz = ii[i + 1] - ii[i];
2606     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2607     nz_max = PetscMax(nz_max, nz);
2608     dlen   = 0;
2609     olen   = 0;
2610     JJ     = jj + ii[i];
2611     for (j = 0; j < nz; j++) {
2612       if (*JJ < cstart || *JJ >= cend) olen++;
2613       else dlen++;
2614       JJ++;
2615     }
2616     d_nnz[i] = dlen;
2617     o_nnz[i] = olen;
2618   }
2619   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2620   PetscCall(PetscFree2(d_nnz, o_nnz));
2621 
2622   values = (PetscScalar *)V;
2623   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2624   for (i = 0; i < m; i++) {
2625     PetscInt        row   = i + rstart;
2626     PetscInt        ncols = ii[i + 1] - ii[i];
2627     const PetscInt *icols = jj + ii[i];
2628     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2629       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2630       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2631     } else { /* block ordering does not match so we can only insert one block at a time. */
2632       PetscInt j;
2633       for (j = 0; j < ncols; j++) {
2634         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2635         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2636       }
2637     }
2638   }
2639 
2640   if (!V) PetscCall(PetscFree(values));
2641   nooffprocentries    = B->nooffprocentries;
2642   B->nooffprocentries = PETSC_TRUE;
2643   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2644   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2645   B->nooffprocentries = nooffprocentries;
2646 
2647   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2648   PetscFunctionReturn(PETSC_SUCCESS);
2649 }
2650 
2651 /*@C
2652   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2653 
2654   Collective
2655 
2656   Input Parameters:
2657 + B  - the matrix
2658 . bs - the block size
2659 . i  - the indices into `j` for the start of each local row (starts with zero)
2660 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2661 - v  - optional values in the matrix
2662 
2663   Level: advanced
2664 
2665   Notes:
2666   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2667   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2668   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2669   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2670   block column and the second index is over columns within a block.
2671 
2672   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2673 
2674 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2675 @*/
2676 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2677 {
2678   PetscFunctionBegin;
2679   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2680   PetscValidType(B, 1);
2681   PetscValidLogicalCollectiveInt(B, bs, 2);
2682   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2683   PetscFunctionReturn(PETSC_SUCCESS);
2684 }
2685 
2686 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2687 {
2688   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2689   PetscInt     i;
2690   PetscMPIInt  size;
2691 
2692   PetscFunctionBegin;
2693   if (B->hash_active) {
2694     B->ops[0]      = b->cops;
2695     B->hash_active = PETSC_FALSE;
2696   }
2697   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2698   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2699   PetscCall(PetscLayoutSetUp(B->rmap));
2700   PetscCall(PetscLayoutSetUp(B->cmap));
2701   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2702 
2703   if (d_nnz) {
2704     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2705   }
2706   if (o_nnz) {
2707     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2708   }
2709 
2710   b->bs2 = bs * bs;
2711   b->mbs = B->rmap->n / bs;
2712   b->nbs = B->cmap->n / bs;
2713   b->Mbs = B->rmap->N / bs;
2714   b->Nbs = B->cmap->N / bs;
2715 
2716   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2717   b->rstartbs = B->rmap->rstart / bs;
2718   b->rendbs   = B->rmap->rend / bs;
2719   b->cstartbs = B->cmap->rstart / bs;
2720   b->cendbs   = B->cmap->rend / bs;
2721 
2722 #if defined(PETSC_USE_CTABLE)
2723   PetscCall(PetscHMapIDestroy(&b->colmap));
2724 #else
2725   PetscCall(PetscFree(b->colmap));
2726 #endif
2727   PetscCall(PetscFree(b->garray));
2728   PetscCall(VecDestroy(&b->lvec));
2729   PetscCall(VecScatterDestroy(&b->Mvctx));
2730 
2731   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2732   PetscCall(MatDestroy(&b->B));
2733   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2734   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2735   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2736 
2737   PetscCall(MatDestroy(&b->A));
2738   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2739   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2740   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2741 
2742   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2743   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2744   B->preallocated  = PETSC_TRUE;
2745   B->was_assembled = PETSC_FALSE;
2746   B->assembled     = PETSC_FALSE;
2747   PetscFunctionReturn(PETSC_SUCCESS);
2748 }
2749 
2750 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2751 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2752 
2753 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2754 {
2755   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2756   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2757   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2758   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2759 
2760   PetscFunctionBegin;
2761   PetscCall(PetscMalloc1(M + 1, &ii));
2762   ii[0] = 0;
2763   for (i = 0; i < M; i++) {
2764     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2765     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2766     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2767     /* remove one from count of matrix has diagonal */
2768     for (j = id[i]; j < id[i + 1]; j++) {
2769       if (jd[j] == i) {
2770         ii[i + 1]--;
2771         break;
2772       }
2773     }
2774   }
2775   PetscCall(PetscMalloc1(ii[M], &jj));
2776   cnt = 0;
2777   for (i = 0; i < M; i++) {
2778     for (j = io[i]; j < io[i + 1]; j++) {
2779       if (garray[jo[j]] > rstart) break;
2780       jj[cnt++] = garray[jo[j]];
2781     }
2782     for (k = id[i]; k < id[i + 1]; k++) {
2783       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2784     }
2785     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2786   }
2787   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2788   PetscFunctionReturn(PETSC_SUCCESS);
2789 }
2790 
2791 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2792 
2793 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2794 
2795 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2796 {
2797   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2798   Mat_MPIAIJ  *b;
2799   Mat          B;
2800 
2801   PetscFunctionBegin;
2802   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2803 
2804   if (reuse == MAT_REUSE_MATRIX) {
2805     B = *newmat;
2806   } else {
2807     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2808     PetscCall(MatSetType(B, MATMPIAIJ));
2809     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2810     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2811     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2812     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2813   }
2814   b = (Mat_MPIAIJ *)B->data;
2815 
2816   if (reuse == MAT_REUSE_MATRIX) {
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2818     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2819   } else {
2820     PetscInt   *garray = a->garray;
2821     Mat_SeqAIJ *bB;
2822     PetscInt    bs, nnz;
2823     PetscCall(MatDestroy(&b->A));
2824     PetscCall(MatDestroy(&b->B));
2825     /* just clear out the data structure */
2826     PetscCall(MatDisAssemble_MPIAIJ(B));
2827     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2828     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2829 
2830     /* Global numbering for b->B columns */
2831     bB  = (Mat_SeqAIJ *)b->B->data;
2832     bs  = A->rmap->bs;
2833     nnz = bB->i[A->rmap->n];
2834     for (PetscInt k = 0; k < nnz; k++) {
2835       PetscInt bj = bB->j[k] / bs;
2836       PetscInt br = bB->j[k] % bs;
2837       bB->j[k]    = garray[bj] * bs + br;
2838     }
2839   }
2840   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2841   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2842 
2843   if (reuse == MAT_INPLACE_MATRIX) {
2844     PetscCall(MatHeaderReplace(A, &B));
2845   } else {
2846     *newmat = B;
2847   }
2848   PetscFunctionReturn(PETSC_SUCCESS);
2849 }
2850 
2851 /*MC
2852    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2853 
2854    Options Database Keys:
2855 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2856 . -mat_block_size <bs> - set the blocksize used to store the matrix
2857 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2858 - -mat_use_hash_table <fact> - set hash table factor
2859 
2860    Level: beginner
2861 
2862    Note:
2863     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2864     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2865 
2866 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2867 M*/
2868 
2869 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2870 
2871 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2872 {
2873   Mat_MPIBAIJ *b;
2874   PetscBool    flg = PETSC_FALSE;
2875 
2876   PetscFunctionBegin;
2877   PetscCall(PetscNew(&b));
2878   B->data      = (void *)b;
2879   B->ops[0]    = MatOps_Values;
2880   B->assembled = PETSC_FALSE;
2881 
2882   B->insertmode = NOT_SET_VALUES;
2883   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2884   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2885 
2886   /* build local table of row and column ownerships */
2887   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2888 
2889   /* build cache for off array entries formed */
2890   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2891 
2892   b->donotstash  = PETSC_FALSE;
2893   b->colmap      = NULL;
2894   b->garray      = NULL;
2895   b->roworiented = PETSC_TRUE;
2896 
2897   /* stuff used in block assembly */
2898   b->barray = NULL;
2899 
2900   /* stuff used for matrix vector multiply */
2901   b->lvec  = NULL;
2902   b->Mvctx = NULL;
2903 
2904   /* stuff for MatGetRow() */
2905   b->rowindices   = NULL;
2906   b->rowvalues    = NULL;
2907   b->getrowactive = PETSC_FALSE;
2908 
2909   /* hash table stuff */
2910   b->ht           = NULL;
2911   b->hd           = NULL;
2912   b->ht_size      = 0;
2913   b->ht_flag      = PETSC_FALSE;
2914   b->ht_fact      = 0;
2915   b->ht_total_ct  = 0;
2916   b->ht_insert_ct = 0;
2917 
2918   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2919   b->ijonly = PETSC_FALSE;
2920 
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2924 #if defined(PETSC_HAVE_HYPRE)
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2926 #endif
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2934   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2935 
2936   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2937   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2938   if (flg) {
2939     PetscReal fact = 1.39;
2940     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2941     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2942     if (fact <= 1.0) fact = 1.39;
2943     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2944     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2945   }
2946   PetscOptionsEnd();
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2951 /*MC
2952    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2953 
2954    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2955    and `MATMPIBAIJ` otherwise.
2956 
2957    Options Database Keys:
2958 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2959 
2960   Level: beginner
2961 
2962 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2963 M*/
2964 
2965 /*@C
2966   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2967   (block compressed row).
2968 
2969   Collective
2970 
2971   Input Parameters:
2972 + B     - the matrix
2973 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2974           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2975 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2976            submatrix  (same for all local rows)
2977 . d_nnz - array containing the number of block nonzeros in the various block rows
2978            of the in diagonal portion of the local (possibly different for each block
2979            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2980            set it even if it is zero.
2981 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2982            submatrix (same for all local rows).
2983 - o_nnz - array containing the number of nonzeros in the various block rows of the
2984            off-diagonal portion of the local submatrix (possibly different for
2985            each block row) or `NULL`.
2986 
2987    If the *_nnz parameter is given then the *_nz parameter is ignored
2988 
2989   Options Database Keys:
2990 + -mat_block_size            - size of the blocks to use
2991 - -mat_use_hash_table <fact> - set hash table factor
2992 
2993   Level: intermediate
2994 
2995   Notes:
2996   For good matrix assembly performance
2997   the user should preallocate the matrix storage by setting the parameters
2998   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2999   performance can be increased by more than a factor of 50.
3000 
3001   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3002   than it must be used on all processors that share the object for that argument.
3003 
3004   Storage Information:
3005   For a square global matrix we define each processor's diagonal portion
3006   to be its local rows and the corresponding columns (a square submatrix);
3007   each processor's off-diagonal portion encompasses the remainder of the
3008   local matrix (a rectangular submatrix).
3009 
3010   The user can specify preallocated storage for the diagonal part of
3011   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3012   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3013   memory allocation.  Likewise, specify preallocated storage for the
3014   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3015 
3016   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3017   the figure below we depict these three local rows and all columns (0-11).
3018 
3019 .vb
3020            0 1 2 3 4 5 6 7 8 9 10 11
3021           --------------------------
3022    row 3  |o o o d d d o o o o  o  o
3023    row 4  |o o o d d d o o o o  o  o
3024    row 5  |o o o d d d o o o o  o  o
3025           --------------------------
3026 .ve
3027 
3028   Thus, any entries in the d locations are stored in the d (diagonal)
3029   submatrix, and any entries in the o locations are stored in the
3030   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3031   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3032 
3033   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3034   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3035   In general, for PDE problems in which most nonzeros are near the diagonal,
3036   one expects `d_nz` >> `o_nz`.
3037 
3038   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3039   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3040   You can also run with the option `-info` and look for messages with the string
3041   malloc in them to see if additional memory allocation was needed.
3042 
3043 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3044 @*/
3045 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3046 {
3047   PetscFunctionBegin;
3048   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3049   PetscValidType(B, 1);
3050   PetscValidLogicalCollectiveInt(B, bs, 2);
3051   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3052   PetscFunctionReturn(PETSC_SUCCESS);
3053 }
3054 
3055 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3056 /*@C
3057   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3058   (block compressed row).
3059 
3060   Collective
3061 
3062   Input Parameters:
3063 + comm  - MPI communicator
3064 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3065           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3066 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3067            This value should be the same as the local size used in creating the
3068            y vector for the matrix-vector product y = Ax.
3069 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3070            This value should be the same as the local size used in creating the
3071            x vector for the matrix-vector product y = Ax.
3072 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3073 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3074 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3075            submatrix  (same for all local rows)
3076 . d_nnz - array containing the number of nonzero blocks in the various block rows
3077            of the in diagonal portion of the local (possibly different for each block
3078            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3079            and set it even if it is zero.
3080 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3081            submatrix (same for all local rows).
3082 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3083            off-diagonal portion of the local submatrix (possibly different for
3084            each block row) or NULL.
3085 
3086   Output Parameter:
3087 . A - the matrix
3088 
3089   Options Database Keys:
3090 + -mat_block_size            - size of the blocks to use
3091 - -mat_use_hash_table <fact> - set hash table factor
3092 
3093   Level: intermediate
3094 
3095   Notes:
3096   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3097   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3098   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3099 
3100   For good matrix assembly performance
3101   the user should preallocate the matrix storage by setting the parameters
3102   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3103   performance can be increased by more than a factor of 50.
3104 
3105   If the *_nnz parameter is given then the *_nz parameter is ignored
3106 
3107   A nonzero block is any block that as 1 or more nonzeros in it
3108 
3109   The user MUST specify either the local or global matrix dimensions
3110   (possibly both).
3111 
3112   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3113   than it must be used on all processors that share the object for that argument.
3114 
3115   Storage Information:
3116   For a square global matrix we define each processor's diagonal portion
3117   to be its local rows and the corresponding columns (a square submatrix);
3118   each processor's off-diagonal portion encompasses the remainder of the
3119   local matrix (a rectangular submatrix).
3120 
3121   The user can specify preallocated storage for the diagonal part of
3122   the local submatrix with either d_nz or d_nnz (not both).  Set
3123   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3124   memory allocation.  Likewise, specify preallocated storage for the
3125   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3126 
3127   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3128   the figure below we depict these three local rows and all columns (0-11).
3129 
3130 .vb
3131            0 1 2 3 4 5 6 7 8 9 10 11
3132           --------------------------
3133    row 3  |o o o d d d o o o o  o  o
3134    row 4  |o o o d d d o o o o  o  o
3135    row 5  |o o o d d d o o o o  o  o
3136           --------------------------
3137 .ve
3138 
3139   Thus, any entries in the d locations are stored in the d (diagonal)
3140   submatrix, and any entries in the o locations are stored in the
3141   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3142   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3143 
3144   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3145   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3146   In general, for PDE problems in which most nonzeros are near the diagonal,
3147   one expects `d_nz` >> `o_nz`.
3148 
3149 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3150 @*/
3151 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3152 {
3153   PetscMPIInt size;
3154 
3155   PetscFunctionBegin;
3156   PetscCall(MatCreate(comm, A));
3157   PetscCall(MatSetSizes(*A, m, n, M, N));
3158   PetscCallMPI(MPI_Comm_size(comm, &size));
3159   if (size > 1) {
3160     PetscCall(MatSetType(*A, MATMPIBAIJ));
3161     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3162   } else {
3163     PetscCall(MatSetType(*A, MATSEQBAIJ));
3164     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3165   }
3166   PetscFunctionReturn(PETSC_SUCCESS);
3167 }
3168 
3169 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3170 {
3171   Mat          mat;
3172   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3173   PetscInt     len = 0;
3174 
3175   PetscFunctionBegin;
3176   *newmat = NULL;
3177   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3178   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3179   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3180 
3181   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3182   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3183   if (matin->hash_active) {
3184     PetscCall(MatSetUp(mat));
3185   } else {
3186     mat->factortype   = matin->factortype;
3187     mat->preallocated = PETSC_TRUE;
3188     mat->assembled    = PETSC_TRUE;
3189     mat->insertmode   = NOT_SET_VALUES;
3190 
3191     a             = (Mat_MPIBAIJ *)mat->data;
3192     mat->rmap->bs = matin->rmap->bs;
3193     a->bs2        = oldmat->bs2;
3194     a->mbs        = oldmat->mbs;
3195     a->nbs        = oldmat->nbs;
3196     a->Mbs        = oldmat->Mbs;
3197     a->Nbs        = oldmat->Nbs;
3198 
3199     a->size         = oldmat->size;
3200     a->rank         = oldmat->rank;
3201     a->donotstash   = oldmat->donotstash;
3202     a->roworiented  = oldmat->roworiented;
3203     a->rowindices   = NULL;
3204     a->rowvalues    = NULL;
3205     a->getrowactive = PETSC_FALSE;
3206     a->barray       = NULL;
3207     a->rstartbs     = oldmat->rstartbs;
3208     a->rendbs       = oldmat->rendbs;
3209     a->cstartbs     = oldmat->cstartbs;
3210     a->cendbs       = oldmat->cendbs;
3211 
3212     /* hash table stuff */
3213     a->ht           = NULL;
3214     a->hd           = NULL;
3215     a->ht_size      = 0;
3216     a->ht_flag      = oldmat->ht_flag;
3217     a->ht_fact      = oldmat->ht_fact;
3218     a->ht_total_ct  = 0;
3219     a->ht_insert_ct = 0;
3220 
3221     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3222     if (oldmat->colmap) {
3223 #if defined(PETSC_USE_CTABLE)
3224       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3225 #else
3226       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3227       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3228 #endif
3229     } else a->colmap = NULL;
3230 
3231     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3232       PetscCall(PetscMalloc1(len, &a->garray));
3233       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3234     } else a->garray = NULL;
3235 
3236     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3237     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3238     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3239 
3240     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3241     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3242   }
3243   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3244   *newmat = mat;
3245   PetscFunctionReturn(PETSC_SUCCESS);
3246 }
3247 
3248 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3249 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3250 {
3251   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3252   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3253   PetscScalar *matvals;
3254 
3255   PetscFunctionBegin;
3256   PetscCall(PetscViewerSetUp(viewer));
3257 
3258   /* read in matrix header */
3259   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3260   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3261   M  = header[1];
3262   N  = header[2];
3263   nz = header[3];
3264   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3265   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3266   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3267 
3268   /* set block sizes from the viewer's .info file */
3269   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3270   /* set local sizes if not set already */
3271   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3272   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3273   /* set global sizes if not set already */
3274   if (mat->rmap->N < 0) mat->rmap->N = M;
3275   if (mat->cmap->N < 0) mat->cmap->N = N;
3276   PetscCall(PetscLayoutSetUp(mat->rmap));
3277   PetscCall(PetscLayoutSetUp(mat->cmap));
3278 
3279   /* check if the matrix sizes are correct */
3280   PetscCall(MatGetSize(mat, &rows, &cols));
3281   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3282   PetscCall(MatGetBlockSize(mat, &bs));
3283   PetscCall(MatGetLocalSize(mat, &m, &n));
3284   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3285   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3286   mbs = m / bs;
3287   nbs = n / bs;
3288 
3289   /* read in row lengths and build row indices */
3290   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3291   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3292   rowidxs[0] = 0;
3293   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3294   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3295   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3296 
3297   /* read in column indices and matrix values */
3298   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3299   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3300   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3301 
3302   {                /* preallocate matrix storage */
3303     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3304     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3305     PetscBool  sbaij, done;
3306     PetscInt  *d_nnz, *o_nnz;
3307 
3308     PetscCall(PetscBTCreate(nbs, &bt));
3309     PetscCall(PetscHSetICreate(&ht));
3310     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3311     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3312     for (i = 0; i < mbs; i++) {
3313       PetscCall(PetscBTMemzero(nbs, bt));
3314       PetscCall(PetscHSetIClear(ht));
3315       for (k = 0; k < bs; k++) {
3316         PetscInt row = bs * i + k;
3317         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3318           PetscInt col = colidxs[j];
3319           if (!sbaij || col >= row) {
3320             if (col >= cs && col < ce) {
3321               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3322             } else {
3323               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3324               if (done) o_nnz[i]++;
3325             }
3326           }
3327         }
3328       }
3329     }
3330     PetscCall(PetscBTDestroy(&bt));
3331     PetscCall(PetscHSetIDestroy(&ht));
3332     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3333     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3334     PetscCall(PetscFree2(d_nnz, o_nnz));
3335   }
3336 
3337   /* store matrix values */
3338   for (i = 0; i < m; i++) {
3339     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3340     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3341   }
3342 
3343   PetscCall(PetscFree(rowidxs));
3344   PetscCall(PetscFree2(colidxs, matvals));
3345   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3346   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3347   PetscFunctionReturn(PETSC_SUCCESS);
3348 }
3349 
3350 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3351 {
3352   PetscBool isbinary;
3353 
3354   PetscFunctionBegin;
3355   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3356   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3357   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3358   PetscFunctionReturn(PETSC_SUCCESS);
3359 }
3360 
3361 /*@
3362   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3363 
3364   Input Parameters:
3365 + mat  - the matrix
3366 - fact - factor
3367 
3368   Options Database Key:
3369 . -mat_use_hash_table <fact> - provide the factor
3370 
3371   Level: advanced
3372 
3373 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3374 @*/
3375 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3376 {
3377   PetscFunctionBegin;
3378   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3379   PetscFunctionReturn(PETSC_SUCCESS);
3380 }
3381 
3382 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3383 {
3384   Mat_MPIBAIJ *baij;
3385 
3386   PetscFunctionBegin;
3387   baij          = (Mat_MPIBAIJ *)mat->data;
3388   baij->ht_fact = fact;
3389   PetscFunctionReturn(PETSC_SUCCESS);
3390 }
3391 
3392 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3393 {
3394   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3395   PetscBool    flg;
3396 
3397   PetscFunctionBegin;
3398   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3399   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3400   if (Ad) *Ad = a->A;
3401   if (Ao) *Ao = a->B;
3402   if (colmap) *colmap = a->garray;
3403   PetscFunctionReturn(PETSC_SUCCESS);
3404 }
3405 
3406 /*
3407     Special version for direct calls from Fortran (to eliminate two function call overheads
3408 */
3409 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3410   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3411 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3412   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3413 #endif
3414 
3415 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3416 /*@C
3417   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3418 
3419   Collective
3420 
3421   Input Parameters:
3422 + matin  - the matrix
3423 . min    - number of input rows
3424 . im     - input rows
3425 . nin    - number of input columns
3426 . in     - input columns
3427 . v      - numerical values input
3428 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3429 
3430   Level: advanced
3431 
3432   Developer Notes:
3433   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3434 
3435 .seealso: `Mat`, `MatSetValuesBlocked()`
3436 @*/
3437 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3438 {
3439   /* convert input arguments to C version */
3440   Mat        mat = *matin;
3441   PetscInt   m = *min, n = *nin;
3442   InsertMode addv = *addvin;
3443 
3444   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3445   const MatScalar *value;
3446   MatScalar       *barray      = baij->barray;
3447   PetscBool        roworiented = baij->roworiented;
3448   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3449   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3450   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3451 
3452   PetscFunctionBegin;
3453   /* tasks normally handled by MatSetValuesBlocked() */
3454   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3455   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3456   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3457   if (mat->assembled) {
3458     mat->was_assembled = PETSC_TRUE;
3459     mat->assembled     = PETSC_FALSE;
3460   }
3461   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3462 
3463   if (!barray) {
3464     PetscCall(PetscMalloc1(bs2, &barray));
3465     baij->barray = barray;
3466   }
3467 
3468   if (roworiented) stepval = (n - 1) * bs;
3469   else stepval = (m - 1) * bs;
3470 
3471   for (i = 0; i < m; i++) {
3472     if (im[i] < 0) continue;
3473     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3474     if (im[i] >= rstart && im[i] < rend) {
3475       row = im[i] - rstart;
3476       for (j = 0; j < n; j++) {
3477         /* If NumCol = 1 then a copy is not required */
3478         if ((roworiented) && (n == 1)) {
3479           barray = (MatScalar *)v + i * bs2;
3480         } else if ((!roworiented) && (m == 1)) {
3481           barray = (MatScalar *)v + j * bs2;
3482         } else { /* Here a copy is required */
3483           if (roworiented) {
3484             value = v + i * (stepval + bs) * bs + j * bs;
3485           } else {
3486             value = v + j * (stepval + bs) * bs + i * bs;
3487           }
3488           for (ii = 0; ii < bs; ii++, value += stepval) {
3489             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3490           }
3491           barray -= bs2;
3492         }
3493 
3494         if (in[j] >= cstart && in[j] < cend) {
3495           col = in[j] - cstart;
3496           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3497         } else if (in[j] < 0) {
3498           continue;
3499         } else {
3500           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3501           if (mat->was_assembled) {
3502             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3503 
3504 #if defined(PETSC_USE_DEBUG)
3505   #if defined(PETSC_USE_CTABLE)
3506             {
3507               PetscInt data;
3508               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3509               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3510             }
3511   #else
3512             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3513   #endif
3514 #endif
3515 #if defined(PETSC_USE_CTABLE)
3516             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3517             col = (col - 1) / bs;
3518 #else
3519             col = (baij->colmap[in[j]] - 1) / bs;
3520 #endif
3521             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3522               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3523               col = in[j];
3524             }
3525           } else col = in[j];
3526           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3527         }
3528       }
3529     } else {
3530       if (!baij->donotstash) {
3531         if (roworiented) {
3532           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3533         } else {
3534           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3535         }
3536       }
3537     }
3538   }
3539 
3540   /* task normally handled by MatSetValuesBlocked() */
3541   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3542   PetscFunctionReturn(PETSC_SUCCESS);
3543 }
3544 
3545 /*@
3546   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3547   CSR format the local rows.
3548 
3549   Collective
3550 
3551   Input Parameters:
3552 + comm - MPI communicator
3553 . bs   - the block size, only a block size of 1 is supported
3554 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3555 . n    - This value should be the same as the local size used in creating the
3556        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3557        calculated if N is given) For square matrices n is almost always m.
3558 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3559 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3560 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3561 . j    - column indices
3562 - a    - matrix values
3563 
3564   Output Parameter:
3565 . mat - the matrix
3566 
3567   Level: intermediate
3568 
3569   Notes:
3570   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3571   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3572   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3573 
3574   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3575   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3576   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3577   with column-major ordering within blocks.
3578 
3579   The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3580 
3581 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3582           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3583 @*/
3584 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3585 {
3586   PetscFunctionBegin;
3587   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3588   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3589   PetscCall(MatCreate(comm, mat));
3590   PetscCall(MatSetSizes(*mat, m, n, M, N));
3591   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3592   PetscCall(MatSetBlockSize(*mat, bs));
3593   PetscCall(MatSetUp(*mat));
3594   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3595   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3596   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3597   PetscFunctionReturn(PETSC_SUCCESS);
3598 }
3599 
3600 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3601 {
3602   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3603   PetscInt    *indx;
3604   PetscScalar *values;
3605 
3606   PetscFunctionBegin;
3607   PetscCall(MatGetSize(inmat, &m, &N));
3608   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3609     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3610     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3611     PetscInt    *bindx, rmax = a->rmax, j;
3612     PetscMPIInt  rank, size;
3613 
3614     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3615     mbs = m / bs;
3616     Nbs = N / cbs;
3617     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3618     nbs = n / cbs;
3619 
3620     PetscCall(PetscMalloc1(rmax, &bindx));
3621     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3622 
3623     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3624     PetscCallMPI(MPI_Comm_rank(comm, &size));
3625     if (rank == size - 1) {
3626       /* Check sum(nbs) = Nbs */
3627       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3628     }
3629 
3630     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3631     for (i = 0; i < mbs; i++) {
3632       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3633       nnz = nnz / bs;
3634       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3635       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3636       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3637     }
3638     PetscCall(PetscFree(bindx));
3639 
3640     PetscCall(MatCreate(comm, outmat));
3641     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3642     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3643     PetscCall(MatSetType(*outmat, MATBAIJ));
3644     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3645     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3646     MatPreallocateEnd(dnz, onz);
3647     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3648   }
3649 
3650   /* numeric phase */
3651   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3652   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3653 
3654   for (i = 0; i < m; i++) {
3655     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3656     Ii = i + rstart;
3657     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3658     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3659   }
3660   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3661   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3662   PetscFunctionReturn(PETSC_SUCCESS);
3663 }
3664