xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) {
12   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
13   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
14   PetscScalar       *va, *vv;
15   Vec                vB, vA;
16   const PetscScalar *vb;
17 
18   PetscFunctionBegin;
19   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
20   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
21 
22   PetscCall(VecGetArrayWrite(vA, &va));
23   if (idx) {
24     for (i = 0; i < m; i++) {
25       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
26     }
27   }
28 
29   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
30   PetscCall(PetscMalloc1(m, &idxb));
31   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
32 
33   PetscCall(VecGetArrayWrite(v, &vv));
34   PetscCall(VecGetArrayRead(vB, &vb));
35   for (i = 0; i < m; i++) {
36     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
37       vv[i] = vb[i];
38       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
39     } else {
40       vv[i] = va[i];
41       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
42     }
43   }
44   PetscCall(VecRestoreArrayWrite(vA, &vv));
45   PetscCall(VecRestoreArrayWrite(vA, &va));
46   PetscCall(VecRestoreArrayRead(vB, &vb));
47   PetscCall(PetscFree(idxb));
48   PetscCall(VecDestroy(&vA));
49   PetscCall(VecDestroy(&vB));
50   PetscFunctionReturn(0);
51 }
52 
53 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) {
54   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
55 
56   PetscFunctionBegin;
57   PetscCall(MatStoreValues(aij->A));
58   PetscCall(MatStoreValues(aij->B));
59   PetscFunctionReturn(0);
60 }
61 
62 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) {
63   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
64 
65   PetscFunctionBegin;
66   PetscCall(MatRetrieveValues(aij->A));
67   PetscCall(MatRetrieveValues(aij->B));
68   PetscFunctionReturn(0);
69 }
70 
71 /*
72      Local utility routine that creates a mapping from the global column
73    number to the local number in the off-diagonal part of the local
74    storage of the matrix.  This is done in a non scalable way since the
75    length of colmap equals the global matrix length.
76 */
77 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) {
78   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
79   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
80   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap));
85   for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES));
86 #else
87   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
88   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
89 #endif
90   PetscFunctionReturn(0);
91 }
92 
93 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
94   { \
95     brow = row / bs; \
96     rp   = aj + ai[brow]; \
97     ap   = aa + bs2 * ai[brow]; \
98     rmax = aimax[brow]; \
99     nrow = ailen[brow]; \
100     bcol = col / bs; \
101     ridx = row % bs; \
102     cidx = col % bs; \
103     low  = 0; \
104     high = nrow; \
105     while (high - low > 3) { \
106       t = (low + high) / 2; \
107       if (rp[t] > bcol) high = t; \
108       else low = t; \
109     } \
110     for (_i = low; _i < high; _i++) { \
111       if (rp[_i] > bcol) break; \
112       if (rp[_i] == bcol) { \
113         bap = ap + bs2 * _i + bs * cidx + ridx; \
114         if (addv == ADD_VALUES) *bap += value; \
115         else *bap = value; \
116         goto a_noinsert; \
117       } \
118     } \
119     if (a->nonew == 1) goto a_noinsert; \
120     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
121     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
122     N = nrow++ - 1; \
123     /* shift up all the later entries in this row */ \
124     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
125     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
126     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
127     rp[_i]                          = bcol; \
128     ap[bs2 * _i + bs * cidx + ridx] = value; \
129   a_noinsert:; \
130     ailen[brow] = nrow; \
131   }
132 
133 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
134   { \
135     brow = row / bs; \
136     rp   = bj + bi[brow]; \
137     ap   = ba + bs2 * bi[brow]; \
138     rmax = bimax[brow]; \
139     nrow = bilen[brow]; \
140     bcol = col / bs; \
141     ridx = row % bs; \
142     cidx = col % bs; \
143     low  = 0; \
144     high = nrow; \
145     while (high - low > 3) { \
146       t = (low + high) / 2; \
147       if (rp[t] > bcol) high = t; \
148       else low = t; \
149     } \
150     for (_i = low; _i < high; _i++) { \
151       if (rp[_i] > bcol) break; \
152       if (rp[_i] == bcol) { \
153         bap = ap + bs2 * _i + bs * cidx + ridx; \
154         if (addv == ADD_VALUES) *bap += value; \
155         else *bap = value; \
156         goto b_noinsert; \
157       } \
158     } \
159     if (b->nonew == 1) goto b_noinsert; \
160     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
161     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
162     N = nrow++ - 1; \
163     /* shift up all the later entries in this row */ \
164     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
165     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
166     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
167     rp[_i]                          = bcol; \
168     ap[bs2 * _i + bs * cidx + ridx] = value; \
169   b_noinsert:; \
170     bilen[brow] = nrow; \
171   }
172 
173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
174   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
175   MatScalar    value;
176   PetscBool    roworiented = baij->roworiented;
177   PetscInt     i, j, row, col;
178   PetscInt     rstart_orig = mat->rmap->rstart;
179   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
180   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
181 
182   /* Some Variables required in the macro */
183   Mat          A     = baij->A;
184   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
185   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
186   MatScalar   *aa = a->a;
187 
188   Mat          B     = baij->B;
189   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
190   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
191   MatScalar   *ba = b->a;
192 
193   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
194   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
195   MatScalar *ap, *bap;
196 
197   PetscFunctionBegin;
198   for (i = 0; i < m; i++) {
199     if (im[i] < 0) continue;
200     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
201     if (im[i] >= rstart_orig && im[i] < rend_orig) {
202       row = im[i] - rstart_orig;
203       for (j = 0; j < n; j++) {
204         if (in[j] >= cstart_orig && in[j] < cend_orig) {
205           col = in[j] - cstart_orig;
206           if (roworiented) value = v[i * n + j];
207           else value = v[i + j * m];
208           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
209         } else if (in[j] < 0) {
210           continue;
211         } else {
212           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
213           if (mat->was_assembled) {
214             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
215 #if defined(PETSC_USE_CTABLE)
216             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
217             col = col - 1;
218 #else
219             col = baij->colmap[in[j] / bs] - 1;
220 #endif
221             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
222               PetscCall(MatDisAssemble_MPIBAIJ(mat));
223               col   = in[j];
224               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
225               B     = baij->B;
226               b     = (Mat_SeqBAIJ *)(B)->data;
227               bimax = b->imax;
228               bi    = b->i;
229               bilen = b->ilen;
230               bj    = b->j;
231               ba    = b->a;
232             } else {
233               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
234               col += in[j] % bs;
235             }
236           } else col = in[j];
237           if (roworiented) value = v[i * n + j];
238           else value = v[i + j * m];
239           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
240           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
241         }
242       }
243     } else {
244       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
245       if (!baij->donotstash) {
246         mat->assembled = PETSC_FALSE;
247         if (roworiented) {
248           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
249         } else {
250           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
251         }
252       }
253     }
254   }
255   PetscFunctionReturn(0);
256 }
257 
258 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) {
259   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
260   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
261   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
262   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
263   PetscBool          roworiented = a->roworiented;
264   const PetscScalar *value       = v;
265   MatScalar         *ap, *aa = a->a, *bap;
266 
267   PetscFunctionBegin;
268   rp    = aj + ai[row];
269   ap    = aa + bs2 * ai[row];
270   rmax  = imax[row];
271   nrow  = ailen[row];
272   value = v;
273   low   = 0;
274   high  = nrow;
275   while (high - low > 7) {
276     t = (low + high) / 2;
277     if (rp[t] > col) high = t;
278     else low = t;
279   }
280   for (i = low; i < high; i++) {
281     if (rp[i] > col) break;
282     if (rp[i] == col) {
283       bap = ap + bs2 * i;
284       if (roworiented) {
285         if (is == ADD_VALUES) {
286           for (ii = 0; ii < bs; ii++) {
287             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
288           }
289         } else {
290           for (ii = 0; ii < bs; ii++) {
291             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
292           }
293         }
294       } else {
295         if (is == ADD_VALUES) {
296           for (ii = 0; ii < bs; ii++, value += bs) {
297             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
298             bap += bs;
299           }
300         } else {
301           for (ii = 0; ii < bs; ii++, value += bs) {
302             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
303             bap += bs;
304           }
305         }
306       }
307       goto noinsert2;
308     }
309   }
310   if (nonew == 1) goto noinsert2;
311   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
312   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
313   N = nrow++ - 1;
314   high++;
315   /* shift up all the later entries in this row */
316   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
317   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
318   rp[i] = col;
319   bap   = ap + bs2 * i;
320   if (roworiented) {
321     for (ii = 0; ii < bs; ii++) {
322       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
323     }
324   } else {
325     for (ii = 0; ii < bs; ii++) {
326       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
327     }
328   }
329 noinsert2:;
330   ailen[row] = nrow;
331   PetscFunctionReturn(0);
332 }
333 
334 /*
335     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
336     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
337 */
338 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
339   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
340   const PetscScalar *value;
341   MatScalar         *barray      = baij->barray;
342   PetscBool          roworiented = baij->roworiented;
343   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
344   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
345   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
346 
347   PetscFunctionBegin;
348   if (!barray) {
349     PetscCall(PetscMalloc1(bs2, &barray));
350     baij->barray = barray;
351   }
352 
353   if (roworiented) stepval = (n - 1) * bs;
354   else stepval = (m - 1) * bs;
355 
356   for (i = 0; i < m; i++) {
357     if (im[i] < 0) continue;
358     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
359     if (im[i] >= rstart && im[i] < rend) {
360       row = im[i] - rstart;
361       for (j = 0; j < n; j++) {
362         /* If NumCol = 1 then a copy is not required */
363         if ((roworiented) && (n == 1)) {
364           barray = (MatScalar *)v + i * bs2;
365         } else if ((!roworiented) && (m == 1)) {
366           barray = (MatScalar *)v + j * bs2;
367         } else { /* Here a copy is required */
368           if (roworiented) {
369             value = v + (i * (stepval + bs) + j) * bs;
370           } else {
371             value = v + (j * (stepval + bs) + i) * bs;
372           }
373           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
374             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
375             barray += bs;
376           }
377           barray -= bs2;
378         }
379 
380         if (in[j] >= cstart && in[j] < cend) {
381           col = in[j] - cstart;
382           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
383         } else if (in[j] < 0) {
384           continue;
385         } else {
386           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
387           if (mat->was_assembled) {
388             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
389 
390 #if defined(PETSC_USE_DEBUG)
391 #if defined(PETSC_USE_CTABLE)
392             {
393               PetscInt data;
394               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
395               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
396             }
397 #else
398             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
399 #endif
400 #endif
401 #if defined(PETSC_USE_CTABLE)
402             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
403             col = (col - 1) / bs;
404 #else
405             col = (baij->colmap[in[j]] - 1) / bs;
406 #endif
407             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
408               PetscCall(MatDisAssemble_MPIBAIJ(mat));
409               col = in[j];
410             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
411           } else col = in[j];
412           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
413         }
414       }
415     } else {
416       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
417       if (!baij->donotstash) {
418         if (roworiented) {
419           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
420         } else {
421           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
422         }
423       }
424     }
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #define HASH_KEY             0.6180339887
430 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
431 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
432 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
433 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
434   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
435   PetscBool    roworiented = baij->roworiented;
436   PetscInt     i, j, row, col;
437   PetscInt     rstart_orig = mat->rmap->rstart;
438   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
439   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
440   PetscReal    tmp;
441   MatScalar  **HD       = baij->hd, value;
442   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
443 
444   PetscFunctionBegin;
445   for (i = 0; i < m; i++) {
446     if (PetscDefined(USE_DEBUG)) {
447       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
448       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
449     }
450     row = im[i];
451     if (row >= rstart_orig && row < rend_orig) {
452       for (j = 0; j < n; j++) {
453         col = in[j];
454         if (roworiented) value = v[i * n + j];
455         else value = v[i + j * m];
456         /* Look up PetscInto the Hash Table */
457         key = (row / bs) * Nbs + (col / bs) + 1;
458         h1  = HASH(size, key, tmp);
459 
460         idx = h1;
461         if (PetscDefined(USE_DEBUG)) {
462           insert_ct++;
463           total_ct++;
464           if (HT[idx] != key) {
465             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
466               ;
467             if (idx == size) {
468               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
469                 ;
470               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
471             }
472           }
473         } else if (HT[idx] != key) {
474           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
475             ;
476           if (idx == size) {
477             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
478               ;
479             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
480           }
481         }
482         /* A HASH table entry is found, so insert the values at the correct address */
483         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
484         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
485       }
486     } else if (!baij->donotstash) {
487       if (roworiented) {
488         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
489       } else {
490         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
491       }
492     }
493   }
494   if (PetscDefined(USE_DEBUG)) {
495     baij->ht_total_ct += total_ct;
496     baij->ht_insert_ct += insert_ct;
497   }
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
502   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
503   PetscBool          roworiented = baij->roworiented;
504   PetscInt           i, j, ii, jj, row, col;
505   PetscInt           rstart = baij->rstartbs;
506   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
507   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
508   PetscReal          tmp;
509   MatScalar        **HD = baij->hd, *baij_a;
510   const PetscScalar *v_t, *value;
511   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
512 
513   PetscFunctionBegin;
514   if (roworiented) stepval = (n - 1) * bs;
515   else stepval = (m - 1) * bs;
516 
517   for (i = 0; i < m; i++) {
518     if (PetscDefined(USE_DEBUG)) {
519       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
520       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
521     }
522     row = im[i];
523     v_t = v + i * nbs2;
524     if (row >= rstart && row < rend) {
525       for (j = 0; j < n; j++) {
526         col = in[j];
527 
528         /* Look up into the Hash Table */
529         key = row * Nbs + col + 1;
530         h1  = HASH(size, key, tmp);
531 
532         idx = h1;
533         if (PetscDefined(USE_DEBUG)) {
534           total_ct++;
535           insert_ct++;
536           if (HT[idx] != key) {
537             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
538               ;
539             if (idx == size) {
540               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
541                 ;
542               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
543             }
544           }
545         } else if (HT[idx] != key) {
546           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
547             ;
548           if (idx == size) {
549             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
550               ;
551             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
552           }
553         }
554         baij_a = HD[idx];
555         if (roworiented) {
556           /*value = v + i*(stepval+bs)*bs + j*bs;*/
557           /* value = v + (i*(stepval+bs)+j)*bs; */
558           value = v_t;
559           v_t += bs;
560           if (addv == ADD_VALUES) {
561             for (ii = 0; ii < bs; ii++, value += stepval) {
562               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
563             }
564           } else {
565             for (ii = 0; ii < bs; ii++, value += stepval) {
566               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
567             }
568           }
569         } else {
570           value = v + j * (stepval + bs) * bs + i * bs;
571           if (addv == ADD_VALUES) {
572             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
573               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
574             }
575           } else {
576             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
577               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
578             }
579           }
580         }
581       }
582     } else {
583       if (!baij->donotstash) {
584         if (roworiented) {
585           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
586         } else {
587           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
588         }
589       }
590     }
591   }
592   if (PetscDefined(USE_DEBUG)) {
593     baij->ht_total_ct += total_ct;
594     baij->ht_insert_ct += insert_ct;
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
600   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
601   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
602   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
603 
604   PetscFunctionBegin;
605   for (i = 0; i < m; i++) {
606     if (idxm[i] < 0) continue; /* negative row */
607     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
608     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
609       row = idxm[i] - bsrstart;
610       for (j = 0; j < n; j++) {
611         if (idxn[j] < 0) continue; /* negative column */
612         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
613         if (idxn[j] >= bscstart && idxn[j] < bscend) {
614           col = idxn[j] - bscstart;
615           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
616         } else {
617           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
618 #if defined(PETSC_USE_CTABLE)
619           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
620           data--;
621 #else
622           data = baij->colmap[idxn[j] / bs] - 1;
623 #endif
624           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
625           else {
626             col = data + idxn[j] % bs;
627             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
628           }
629         }
630       }
631     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) {
637   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
638   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
639   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
640   PetscReal    sum = 0.0;
641   MatScalar   *v;
642 
643   PetscFunctionBegin;
644   if (baij->size == 1) {
645     PetscCall(MatNorm(baij->A, type, nrm));
646   } else {
647     if (type == NORM_FROBENIUS) {
648       v  = amat->a;
649       nz = amat->nz * bs2;
650       for (i = 0; i < nz; i++) {
651         sum += PetscRealPart(PetscConj(*v) * (*v));
652         v++;
653       }
654       v  = bmat->a;
655       nz = bmat->nz * bs2;
656       for (i = 0; i < nz; i++) {
657         sum += PetscRealPart(PetscConj(*v) * (*v));
658         v++;
659       }
660       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
661       *nrm = PetscSqrtReal(*nrm);
662     } else if (type == NORM_1) { /* max column sum */
663       PetscReal *tmp, *tmp2;
664       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
665       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
666       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
667       v  = amat->a;
668       jj = amat->j;
669       for (i = 0; i < amat->nz; i++) {
670         for (j = 0; j < bs; j++) {
671           col = bs * (cstart + *jj) + j; /* column index */
672           for (row = 0; row < bs; row++) {
673             tmp[col] += PetscAbsScalar(*v);
674             v++;
675           }
676         }
677         jj++;
678       }
679       v  = bmat->a;
680       jj = bmat->j;
681       for (i = 0; i < bmat->nz; i++) {
682         for (j = 0; j < bs; j++) {
683           col = bs * garray[*jj] + j;
684           for (row = 0; row < bs; row++) {
685             tmp[col] += PetscAbsScalar(*v);
686             v++;
687           }
688         }
689         jj++;
690       }
691       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
692       *nrm = 0.0;
693       for (j = 0; j < mat->cmap->N; j++) {
694         if (tmp2[j] > *nrm) *nrm = tmp2[j];
695       }
696       PetscCall(PetscFree(tmp));
697       PetscCall(PetscFree(tmp2));
698     } else if (type == NORM_INFINITY) { /* max row sum */
699       PetscReal *sums;
700       PetscCall(PetscMalloc1(bs, &sums));
701       sum = 0.0;
702       for (j = 0; j < amat->mbs; j++) {
703         for (row = 0; row < bs; row++) sums[row] = 0.0;
704         v  = amat->a + bs2 * amat->i[j];
705         nz = amat->i[j + 1] - amat->i[j];
706         for (i = 0; i < nz; i++) {
707           for (col = 0; col < bs; col++) {
708             for (row = 0; row < bs; row++) {
709               sums[row] += PetscAbsScalar(*v);
710               v++;
711             }
712           }
713         }
714         v  = bmat->a + bs2 * bmat->i[j];
715         nz = bmat->i[j + 1] - bmat->i[j];
716         for (i = 0; i < nz; i++) {
717           for (col = 0; col < bs; col++) {
718             for (row = 0; row < bs; row++) {
719               sums[row] += PetscAbsScalar(*v);
720               v++;
721             }
722           }
723         }
724         for (row = 0; row < bs; row++) {
725           if (sums[row] > sum) sum = sums[row];
726         }
727       }
728       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
729       PetscCall(PetscFree(sums));
730     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
731   }
732   PetscFunctionReturn(0);
733 }
734 
735 /*
736   Creates the hash table, and sets the table
737   This table is created only once.
738   If new entried need to be added to the matrix
739   then the hash table has to be destroyed and
740   recreated.
741 */
742 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) {
743   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
744   Mat          A = baij->A, B = baij->B;
745   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
746   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
747   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
748   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
749   PetscInt    *HT, key;
750   MatScalar  **HD;
751   PetscReal    tmp;
752 #if defined(PETSC_USE_INFO)
753   PetscInt ct = 0, max = 0;
754 #endif
755 
756   PetscFunctionBegin;
757   if (baij->ht) PetscFunctionReturn(0);
758 
759   baij->ht_size = (PetscInt)(factor * nz);
760   ht_size       = baij->ht_size;
761 
762   /* Allocate Memory for Hash Table */
763   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
764   HD = baij->hd;
765   HT = baij->ht;
766 
767   /* Loop Over A */
768   for (i = 0; i < a->mbs; i++) {
769     for (j = ai[i]; j < ai[i + 1]; j++) {
770       row = i + rstart;
771       col = aj[j] + cstart;
772 
773       key = row * Nbs + col + 1;
774       h1  = HASH(ht_size, key, tmp);
775       for (k = 0; k < ht_size; k++) {
776         if (!HT[(h1 + k) % ht_size]) {
777           HT[(h1 + k) % ht_size] = key;
778           HD[(h1 + k) % ht_size] = a->a + j * bs2;
779           break;
780 #if defined(PETSC_USE_INFO)
781         } else {
782           ct++;
783 #endif
784         }
785       }
786 #if defined(PETSC_USE_INFO)
787       if (k > max) max = k;
788 #endif
789     }
790   }
791   /* Loop Over B */
792   for (i = 0; i < b->mbs; i++) {
793     for (j = bi[i]; j < bi[i + 1]; j++) {
794       row = i + rstart;
795       col = garray[bj[j]];
796       key = row * Nbs + col + 1;
797       h1  = HASH(ht_size, key, tmp);
798       for (k = 0; k < ht_size; k++) {
799         if (!HT[(h1 + k) % ht_size]) {
800           HT[(h1 + k) % ht_size] = key;
801           HD[(h1 + k) % ht_size] = b->a + j * bs2;
802           break;
803 #if defined(PETSC_USE_INFO)
804         } else {
805           ct++;
806 #endif
807         }
808       }
809 #if defined(PETSC_USE_INFO)
810       if (k > max) max = k;
811 #endif
812     }
813   }
814 
815   /* Print Summary */
816 #if defined(PETSC_USE_INFO)
817   for (i = 0, j = 0; i < ht_size; i++) {
818     if (HT[i]) j++;
819   }
820   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
821 #endif
822   PetscFunctionReturn(0);
823 }
824 
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) {
826   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
827   PetscInt     nstash, reallocs;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
831 
832   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
833   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
834   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
835   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
836   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
837   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
838   PetscFunctionReturn(0);
839 }
840 
841 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) {
842   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
843   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
844   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
845   PetscInt    *row, *col;
846   PetscBool    r1, r2, r3, other_disassembled;
847   MatScalar   *val;
848   PetscMPIInt  n;
849 
850   PetscFunctionBegin;
851   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
852   if (!baij->donotstash && !mat->nooffprocentries) {
853     while (1) {
854       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
855       if (!flg) break;
856 
857       for (i = 0; i < n;) {
858         /* Now identify the consecutive vals belonging to the same row */
859         for (j = i, rstart = row[j]; j < n; j++) {
860           if (row[j] != rstart) break;
861         }
862         if (j < n) ncols = j - i;
863         else ncols = n - i;
864         /* Now assemble all these values with a single function call */
865         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
866         i = j;
867       }
868     }
869     PetscCall(MatStashScatterEnd_Private(&mat->stash));
870     /* Now process the block-stash. Since the values are stashed column-oriented,
871        set the roworiented flag to column oriented, and after MatSetValues()
872        restore the original flags */
873     r1 = baij->roworiented;
874     r2 = a->roworiented;
875     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
876 
877     baij->roworiented = PETSC_FALSE;
878     a->roworiented    = PETSC_FALSE;
879 
880     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
881     while (1) {
882       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
883       if (!flg) break;
884 
885       for (i = 0; i < n;) {
886         /* Now identify the consecutive vals belonging to the same row */
887         for (j = i, rstart = row[j]; j < n; j++) {
888           if (row[j] != rstart) break;
889         }
890         if (j < n) ncols = j - i;
891         else ncols = n - i;
892         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
893         i = j;
894       }
895     }
896     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
897 
898     baij->roworiented = r1;
899     a->roworiented    = r2;
900 
901     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
902   }
903 
904   PetscCall(MatAssemblyBegin(baij->A, mode));
905   PetscCall(MatAssemblyEnd(baij->A, mode));
906 
907   /* determine if any processor has disassembled, if so we must
908      also disassemble ourselves, in order that we may reassemble. */
909   /*
910      if nonzero structure of submatrix B cannot change then we know that
911      no processor disassembled thus we can skip this stuff
912   */
913   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
914     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
915     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
916   }
917 
918   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
919   PetscCall(MatAssemblyBegin(baij->B, mode));
920   PetscCall(MatAssemblyEnd(baij->B, mode));
921 
922 #if defined(PETSC_USE_INFO)
923   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
924     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
925 
926     baij->ht_total_ct  = 0;
927     baij->ht_insert_ct = 0;
928   }
929 #endif
930   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
931     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
932 
933     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
934     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
935   }
936 
937   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
938 
939   baij->rowvalues = NULL;
940 
941   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
942   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
943     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
944     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
945   }
946   PetscFunctionReturn(0);
947 }
948 
949 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
950 #include <petscdraw.h>
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
952   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
953   PetscMPIInt       rank = baij->rank;
954   PetscInt          bs   = mat->rmap->bs;
955   PetscBool         iascii, isdraw;
956   PetscViewer       sviewer;
957   PetscViewerFormat format;
958 
959   PetscFunctionBegin;
960   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
961   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
962   if (iascii) {
963     PetscCall(PetscViewerGetFormat(viewer, &format));
964     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
965       MatInfo info;
966       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
967       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
968       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
969       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
970                                                    mat->rmap->bs, (double)info.memory));
971       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
972       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
973       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
974       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
975       PetscCall(PetscViewerFlush(viewer));
976       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
977       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
978       PetscCall(VecScatterView(baij->Mvctx, viewer));
979       PetscFunctionReturn(0);
980     } else if (format == PETSC_VIEWER_ASCII_INFO) {
981       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
984       PetscFunctionReturn(0);
985     }
986   }
987 
988   if (isdraw) {
989     PetscDraw draw;
990     PetscBool isnull;
991     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
992     PetscCall(PetscDrawIsNull(draw, &isnull));
993     if (isnull) PetscFunctionReturn(0);
994   }
995 
996   {
997     /* assemble the entire matrix onto first processor. */
998     Mat          A;
999     Mat_SeqBAIJ *Aloc;
1000     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1001     MatScalar   *a;
1002     const char  *matname;
1003 
1004     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1005     /* Perhaps this should be the type of mat? */
1006     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1007     if (rank == 0) {
1008       PetscCall(MatSetSizes(A, M, N, M, N));
1009     } else {
1010       PetscCall(MatSetSizes(A, 0, 0, M, N));
1011     }
1012     PetscCall(MatSetType(A, MATMPIBAIJ));
1013     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1014     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1015 
1016     /* copy over the A part */
1017     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1018     ai   = Aloc->i;
1019     aj   = Aloc->j;
1020     a    = Aloc->a;
1021     PetscCall(PetscMalloc1(bs, &rvals));
1022 
1023     for (i = 0; i < mbs; i++) {
1024       rvals[0] = bs * (baij->rstartbs + i);
1025       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1026       for (j = ai[i]; j < ai[i + 1]; j++) {
1027         col = (baij->cstartbs + aj[j]) * bs;
1028         for (k = 0; k < bs; k++) {
1029           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1030           col++;
1031           a += bs;
1032         }
1033       }
1034     }
1035     /* copy over the B part */
1036     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1037     ai   = Aloc->i;
1038     aj   = Aloc->j;
1039     a    = Aloc->a;
1040     for (i = 0; i < mbs; i++) {
1041       rvals[0] = bs * (baij->rstartbs + i);
1042       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1043       for (j = ai[i]; j < ai[i + 1]; j++) {
1044         col = baij->garray[aj[j]] * bs;
1045         for (k = 0; k < bs; k++) {
1046           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1047           col++;
1048           a += bs;
1049         }
1050       }
1051     }
1052     PetscCall(PetscFree(rvals));
1053     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1054     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1055     /*
1056        Everyone has to call to draw the matrix since the graphics waits are
1057        synchronized across all processors that share the PetscDraw object
1058     */
1059     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1060     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1061     if (rank == 0) {
1062       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1063       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1064     }
1065     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1066     PetscCall(PetscViewerFlush(viewer));
1067     PetscCall(MatDestroy(&A));
1068   }
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1073 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) {
1074   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1075   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1076   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1077   const PetscInt *garray = aij->garray;
1078   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1079   PetscInt       *rowlens, *colidxs;
1080   PetscScalar    *matvals;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(PetscViewerSetUp(viewer));
1084 
1085   M  = mat->rmap->N;
1086   N  = mat->cmap->N;
1087   m  = mat->rmap->n;
1088   rs = mat->rmap->rstart;
1089   cs = mat->cmap->rstart;
1090   bs = mat->rmap->bs;
1091   nz = bs * bs * (A->nz + B->nz);
1092 
1093   /* write matrix header */
1094   header[0] = MAT_FILE_CLASSID;
1095   header[1] = M;
1096   header[2] = N;
1097   header[3] = nz;
1098   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1099   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1100 
1101   /* fill in and store row lengths */
1102   PetscCall(PetscMalloc1(m, &rowlens));
1103   for (cnt = 0, i = 0; i < A->mbs; i++)
1104     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1105   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1106   PetscCall(PetscFree(rowlens));
1107 
1108   /* fill in and store column indices */
1109   PetscCall(PetscMalloc1(nz, &colidxs));
1110   for (cnt = 0, i = 0; i < A->mbs; i++) {
1111     for (k = 0; k < bs; k++) {
1112       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1113         if (garray[B->j[jb]] > cs / bs) break;
1114         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1115       }
1116       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1117         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1118       for (; jb < B->i[i + 1]; jb++)
1119         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1120     }
1121   }
1122   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1123   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1124   PetscCall(PetscFree(colidxs));
1125 
1126   /* fill in and store nonzero values */
1127   PetscCall(PetscMalloc1(nz, &matvals));
1128   for (cnt = 0, i = 0; i < A->mbs; i++) {
1129     for (k = 0; k < bs; k++) {
1130       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1131         if (garray[B->j[jb]] > cs / bs) break;
1132         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1133       }
1134       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1135         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1136       for (; jb < B->i[i + 1]; jb++)
1137         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1138     }
1139   }
1140   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1141   PetscCall(PetscFree(matvals));
1142 
1143   /* write block size option to the viewer's .info file */
1144   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) {
1149   PetscBool iascii, isdraw, issocket, isbinary;
1150 
1151   PetscFunctionBegin;
1152   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1153   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1154   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1155   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1156   if (iascii || isdraw || issocket) {
1157     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1158   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) {
1163   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1164 
1165   PetscFunctionBegin;
1166 #if defined(PETSC_USE_LOG)
1167   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1168 #endif
1169   PetscCall(MatStashDestroy_Private(&mat->stash));
1170   PetscCall(MatStashDestroy_Private(&mat->bstash));
1171   PetscCall(MatDestroy(&baij->A));
1172   PetscCall(MatDestroy(&baij->B));
1173 #if defined(PETSC_USE_CTABLE)
1174   PetscCall(PetscTableDestroy(&baij->colmap));
1175 #else
1176   PetscCall(PetscFree(baij->colmap));
1177 #endif
1178   PetscCall(PetscFree(baij->garray));
1179   PetscCall(VecDestroy(&baij->lvec));
1180   PetscCall(VecScatterDestroy(&baij->Mvctx));
1181   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1182   PetscCall(PetscFree(baij->barray));
1183   PetscCall(PetscFree2(baij->hd, baij->ht));
1184   PetscCall(PetscFree(baij->rangebs));
1185   PetscCall(PetscFree(mat->data));
1186 
1187   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
1197 #if defined(PETSC_HAVE_HYPRE)
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
1199 #endif
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) {
1205   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1206   PetscInt     nt;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(VecGetLocalSize(xx, &nt));
1210   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1211   PetscCall(VecGetLocalSize(yy, &nt));
1212   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy");
1213   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1214   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1215   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1216   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1221   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1222 
1223   PetscFunctionBegin;
1224   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1225   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1226   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1227   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) {
1232   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1233 
1234   PetscFunctionBegin;
1235   /* do nondiagonal part */
1236   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1237   /* do local part */
1238   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1239   /* add partial results together */
1240   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1241   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1246   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1247 
1248   PetscFunctionBegin;
1249   /* do nondiagonal part */
1250   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1251   /* do local part */
1252   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1253   /* add partial results together */
1254   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1255   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 /*
1260   This only works correctly for square matrices where the subblock A->A is the
1261    diagonal block
1262 */
1263 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) {
1264   PetscFunctionBegin;
1265   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1266   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) {
1271   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1272 
1273   PetscFunctionBegin;
1274   PetscCall(MatScale(a->A, aa));
1275   PetscCall(MatScale(a->B, aa));
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1280   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1281   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1282   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1283   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1284   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1285 
1286   PetscFunctionBegin;
1287   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1288   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1289   mat->getrowactive = PETSC_TRUE;
1290 
1291   if (!mat->rowvalues && (idx || v)) {
1292     /*
1293         allocate enough space to hold information from the longest row.
1294     */
1295     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1296     PetscInt     max = 1, mbs = mat->mbs, tmp;
1297     for (i = 0; i < mbs; i++) {
1298       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1299       if (max < tmp) max = tmp;
1300     }
1301     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1302   }
1303   lrow = row - brstart;
1304 
1305   pvA = &vworkA;
1306   pcA = &cworkA;
1307   pvB = &vworkB;
1308   pcB = &cworkB;
1309   if (!v) {
1310     pvA = NULL;
1311     pvB = NULL;
1312   }
1313   if (!idx) {
1314     pcA = NULL;
1315     if (!v) pcB = NULL;
1316   }
1317   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1318   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1319   nztot = nzA + nzB;
1320 
1321   cmap = mat->garray;
1322   if (v || idx) {
1323     if (nztot) {
1324       /* Sort by increasing column numbers, assuming A and B already sorted */
1325       PetscInt imark = -1;
1326       if (v) {
1327         *v = v_p = mat->rowvalues;
1328         for (i = 0; i < nzB; i++) {
1329           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1330           else break;
1331         }
1332         imark = i;
1333         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1334         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1335       }
1336       if (idx) {
1337         *idx = idx_p = mat->rowindices;
1338         if (imark > -1) {
1339           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1340         } else {
1341           for (i = 0; i < nzB; i++) {
1342             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1343             else break;
1344           }
1345           imark = i;
1346         }
1347         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1348         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1349       }
1350     } else {
1351       if (idx) *idx = NULL;
1352       if (v) *v = NULL;
1353     }
1354   }
1355   *nz = nztot;
1356   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1357   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1362   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1363 
1364   PetscFunctionBegin;
1365   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1366   baij->getrowactive = PETSC_FALSE;
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) {
1371   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1372 
1373   PetscFunctionBegin;
1374   PetscCall(MatZeroEntries(l->A));
1375   PetscCall(MatZeroEntries(l->B));
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
1380   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1381   Mat            A = a->A, B = a->B;
1382   PetscLogDouble isend[5], irecv[5];
1383 
1384   PetscFunctionBegin;
1385   info->block_size = (PetscReal)matin->rmap->bs;
1386 
1387   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1388 
1389   isend[0] = info->nz_used;
1390   isend[1] = info->nz_allocated;
1391   isend[2] = info->nz_unneeded;
1392   isend[3] = info->memory;
1393   isend[4] = info->mallocs;
1394 
1395   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1396 
1397   isend[0] += info->nz_used;
1398   isend[1] += info->nz_allocated;
1399   isend[2] += info->nz_unneeded;
1400   isend[3] += info->memory;
1401   isend[4] += info->mallocs;
1402 
1403   if (flag == MAT_LOCAL) {
1404     info->nz_used      = isend[0];
1405     info->nz_allocated = isend[1];
1406     info->nz_unneeded  = isend[2];
1407     info->memory       = isend[3];
1408     info->mallocs      = isend[4];
1409   } else if (flag == MAT_GLOBAL_MAX) {
1410     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1411 
1412     info->nz_used      = irecv[0];
1413     info->nz_allocated = irecv[1];
1414     info->nz_unneeded  = irecv[2];
1415     info->memory       = irecv[3];
1416     info->mallocs      = irecv[4];
1417   } else if (flag == MAT_GLOBAL_SUM) {
1418     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1419 
1420     info->nz_used      = irecv[0];
1421     info->nz_allocated = irecv[1];
1422     info->nz_unneeded  = irecv[2];
1423     info->memory       = irecv[3];
1424     info->mallocs      = irecv[4];
1425   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1426   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1427   info->fill_ratio_needed = 0;
1428   info->factor_mallocs    = 0;
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) {
1433   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1434 
1435   PetscFunctionBegin;
1436   switch (op) {
1437   case MAT_NEW_NONZERO_LOCATIONS:
1438   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1439   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1440   case MAT_KEEP_NONZERO_PATTERN:
1441   case MAT_NEW_NONZERO_LOCATION_ERR:
1442     MatCheckPreallocated(A, 1);
1443     PetscCall(MatSetOption(a->A, op, flg));
1444     PetscCall(MatSetOption(a->B, op, flg));
1445     break;
1446   case MAT_ROW_ORIENTED:
1447     MatCheckPreallocated(A, 1);
1448     a->roworiented = flg;
1449 
1450     PetscCall(MatSetOption(a->A, op, flg));
1451     PetscCall(MatSetOption(a->B, op, flg));
1452     break;
1453   case MAT_FORCE_DIAGONAL_ENTRIES:
1454   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
1455   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
1456   case MAT_USE_HASH_TABLE:
1457     a->ht_flag = flg;
1458     a->ht_fact = 1.39;
1459     break;
1460   case MAT_SYMMETRIC:
1461   case MAT_STRUCTURALLY_SYMMETRIC:
1462   case MAT_HERMITIAN:
1463   case MAT_SUBMAT_SINGLEIS:
1464   case MAT_SYMMETRY_ETERNAL:
1465   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1466   case MAT_SPD_ETERNAL:
1467     /* if the diagonal matrix is square it inherits some of the properties above */
1468     break;
1469   default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) {
1475   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1476   Mat_SeqBAIJ *Aloc;
1477   Mat          B;
1478   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1479   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1480   MatScalar   *a;
1481 
1482   PetscFunctionBegin;
1483   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1484   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1485     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1486     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1487     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1488     /* Do not know preallocation information, but must set block size */
1489     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1490   } else {
1491     B = *matout;
1492   }
1493 
1494   /* copy over the A part */
1495   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1496   ai   = Aloc->i;
1497   aj   = Aloc->j;
1498   a    = Aloc->a;
1499   PetscCall(PetscMalloc1(bs, &rvals));
1500 
1501   for (i = 0; i < mbs; i++) {
1502     rvals[0] = bs * (baij->rstartbs + i);
1503     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1504     for (j = ai[i]; j < ai[i + 1]; j++) {
1505       col = (baij->cstartbs + aj[j]) * bs;
1506       for (k = 0; k < bs; k++) {
1507         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1508 
1509         col++;
1510         a += bs;
1511       }
1512     }
1513   }
1514   /* copy over the B part */
1515   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1516   ai   = Aloc->i;
1517   aj   = Aloc->j;
1518   a    = Aloc->a;
1519   for (i = 0; i < mbs; i++) {
1520     rvals[0] = bs * (baij->rstartbs + i);
1521     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1522     for (j = ai[i]; j < ai[i + 1]; j++) {
1523       col = baij->garray[aj[j]] * bs;
1524       for (k = 0; k < bs; k++) {
1525         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1526         col++;
1527         a += bs;
1528       }
1529     }
1530   }
1531   PetscCall(PetscFree(rvals));
1532   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1533   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1534 
1535   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1536   else PetscCall(MatHeaderMerge(A, &B));
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) {
1541   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1542   Mat          a = baij->A, b = baij->B;
1543   PetscInt     s1, s2, s3;
1544 
1545   PetscFunctionBegin;
1546   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1547   if (rr) {
1548     PetscCall(VecGetLocalSize(rr, &s1));
1549     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1550     /* Overlap communication with computation. */
1551     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1552   }
1553   if (ll) {
1554     PetscCall(VecGetLocalSize(ll, &s1));
1555     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1556     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1557   }
1558   /* scale  the diagonal block */
1559   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1560 
1561   if (rr) {
1562     /* Do a scatter end and then right scale the off-diagonal block */
1563     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1564     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1565   }
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
1570   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1571   PetscInt    *lrows;
1572   PetscInt     r, len;
1573   PetscBool    cong;
1574 
1575   PetscFunctionBegin;
1576   /* get locally owned rows */
1577   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1578   /* fix right hand side if needed */
1579   if (x && b) {
1580     const PetscScalar *xx;
1581     PetscScalar       *bb;
1582 
1583     PetscCall(VecGetArrayRead(x, &xx));
1584     PetscCall(VecGetArray(b, &bb));
1585     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1586     PetscCall(VecRestoreArrayRead(x, &xx));
1587     PetscCall(VecRestoreArray(b, &bb));
1588   }
1589 
1590   /* actually zap the local rows */
1591   /*
1592         Zero the required rows. If the "diagonal block" of the matrix
1593      is square and the user wishes to set the diagonal we use separate
1594      code so that MatSetValues() is not called for each diagonal allocating
1595      new memory, thus calling lots of mallocs and slowing things down.
1596 
1597   */
1598   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1599   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1600   PetscCall(MatHasCongruentLayouts(A, &cong));
1601   if ((diag != 0.0) && cong) {
1602     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1603   } else if (diag != 0.0) {
1604     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1605     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1606        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1607     for (r = 0; r < len; ++r) {
1608       const PetscInt row = lrows[r] + A->rmap->rstart;
1609       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1610     }
1611     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1612     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1613   } else {
1614     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1615   }
1616   PetscCall(PetscFree(lrows));
1617 
1618   /* only change matrix nonzero state if pattern was allowed to be changed */
1619   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1620     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1621     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1622   }
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
1627   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1628   PetscMPIInt        n = A->rmap->n, p = 0;
1629   PetscInt           i, j, k, r, len = 0, row, col, count;
1630   PetscInt          *lrows, *owners = A->rmap->range;
1631   PetscSFNode       *rrows;
1632   PetscSF            sf;
1633   const PetscScalar *xx;
1634   PetscScalar       *bb, *mask;
1635   Vec                xmask, lmask;
1636   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1637   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1638   PetscScalar       *aa;
1639 
1640   PetscFunctionBegin;
1641   /* Create SF where leaves are input rows and roots are owned rows */
1642   PetscCall(PetscMalloc1(n, &lrows));
1643   for (r = 0; r < n; ++r) lrows[r] = -1;
1644   PetscCall(PetscMalloc1(N, &rrows));
1645   for (r = 0; r < N; ++r) {
1646     const PetscInt idx = rows[r];
1647     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1648     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1649       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1650     }
1651     rrows[r].rank  = p;
1652     rrows[r].index = rows[r] - owners[p];
1653   }
1654   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1655   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1656   /* Collect flags for rows to be zeroed */
1657   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1658   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1659   PetscCall(PetscSFDestroy(&sf));
1660   /* Compress and put in row numbers */
1661   for (r = 0; r < n; ++r)
1662     if (lrows[r] >= 0) lrows[len++] = r;
1663   /* zero diagonal part of matrix */
1664   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1665   /* handle off diagonal part of matrix */
1666   PetscCall(MatCreateVecs(A, &xmask, NULL));
1667   PetscCall(VecDuplicate(l->lvec, &lmask));
1668   PetscCall(VecGetArray(xmask, &bb));
1669   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1670   PetscCall(VecRestoreArray(xmask, &bb));
1671   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1672   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1673   PetscCall(VecDestroy(&xmask));
1674   if (x) {
1675     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1676     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1677     PetscCall(VecGetArrayRead(l->lvec, &xx));
1678     PetscCall(VecGetArray(b, &bb));
1679   }
1680   PetscCall(VecGetArray(lmask, &mask));
1681   /* remove zeroed rows of off diagonal matrix */
1682   for (i = 0; i < len; ++i) {
1683     row   = lrows[i];
1684     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1685     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1686     for (k = 0; k < count; ++k) {
1687       aa[0] = 0.0;
1688       aa += bs;
1689     }
1690   }
1691   /* loop over all elements of off process part of matrix zeroing removed columns*/
1692   for (i = 0; i < l->B->rmap->N; ++i) {
1693     row = i / bs;
1694     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1695       for (k = 0; k < bs; ++k) {
1696         col = bs * baij->j[j] + k;
1697         if (PetscAbsScalar(mask[col])) {
1698           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1699           if (x) bb[i] -= aa[0] * xx[col];
1700           aa[0] = 0.0;
1701         }
1702       }
1703     }
1704   }
1705   if (x) {
1706     PetscCall(VecRestoreArray(b, &bb));
1707     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1708   }
1709   PetscCall(VecRestoreArray(lmask, &mask));
1710   PetscCall(VecDestroy(&lmask));
1711   PetscCall(PetscFree(lrows));
1712 
1713   /* only change matrix nonzero state if pattern was allowed to be changed */
1714   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1715     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1716     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1717   }
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) {
1722   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1723 
1724   PetscFunctionBegin;
1725   PetscCall(MatSetUnfactored(a->A));
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1730 
1731 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) {
1732   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1733   Mat          a, b, c, d;
1734   PetscBool    flg;
1735 
1736   PetscFunctionBegin;
1737   a = matA->A;
1738   b = matA->B;
1739   c = matB->A;
1740   d = matB->B;
1741 
1742   PetscCall(MatEqual(a, c, &flg));
1743   if (flg) PetscCall(MatEqual(b, d, &flg));
1744   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) {
1749   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1750   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1751 
1752   PetscFunctionBegin;
1753   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1754   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1755     PetscCall(MatCopy_Basic(A, B, str));
1756   } else {
1757     PetscCall(MatCopy(a->A, b->A, str));
1758     PetscCall(MatCopy(a->B, b->B, str));
1759   }
1760   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) {
1765   PetscFunctionBegin;
1766   PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) {
1771   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1772   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1773   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1774 
1775   PetscFunctionBegin;
1776   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
1781   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1782   PetscBLASInt bnz, one                         = 1;
1783   Mat_SeqBAIJ *x, *y;
1784   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1785 
1786   PetscFunctionBegin;
1787   if (str == SAME_NONZERO_PATTERN) {
1788     PetscScalar alpha = a;
1789     x                 = (Mat_SeqBAIJ *)xx->A->data;
1790     y                 = (Mat_SeqBAIJ *)yy->A->data;
1791     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1792     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1793     x = (Mat_SeqBAIJ *)xx->B->data;
1794     y = (Mat_SeqBAIJ *)yy->B->data;
1795     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1796     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1797     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1798   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1799     PetscCall(MatAXPY_Basic(Y, a, X, str));
1800   } else {
1801     Mat       B;
1802     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1803     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1804     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1805     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1806     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1807     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1808     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1809     PetscCall(MatSetType(B, MATMPIBAIJ));
1810     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1811     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1812     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1813     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1814     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1815     PetscCall(MatHeaderMerge(Y, &B));
1816     PetscCall(PetscFree(nnz_d));
1817     PetscCall(PetscFree(nnz_o));
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) {
1823   PetscFunctionBegin;
1824   if (PetscDefined(USE_COMPLEX)) {
1825     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1826 
1827     PetscCall(MatConjugate_SeqBAIJ(a->A));
1828     PetscCall(MatConjugate_SeqBAIJ(a->B));
1829   }
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) {
1834   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1835 
1836   PetscFunctionBegin;
1837   PetscCall(MatRealPart(a->A));
1838   PetscCall(MatRealPart(a->B));
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) {
1843   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1844 
1845   PetscFunctionBegin;
1846   PetscCall(MatImaginaryPart(a->A));
1847   PetscCall(MatImaginaryPart(a->B));
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
1852   IS       iscol_local;
1853   PetscInt csize;
1854 
1855   PetscFunctionBegin;
1856   PetscCall(ISGetLocalSize(iscol, &csize));
1857   if (call == MAT_REUSE_MATRIX) {
1858     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1859     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1860   } else {
1861     PetscCall(ISAllGather(iscol, &iscol_local));
1862   }
1863   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1864   if (call == MAT_INITIAL_MATRIX) {
1865     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1866     PetscCall(ISDestroy(&iscol_local));
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 /*
1872   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1873   in local and then by concatenating the local matrices the end result.
1874   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1875   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1876 */
1877 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) {
1878   PetscMPIInt  rank, size;
1879   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1880   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1881   Mat          M, Mreuse;
1882   MatScalar   *vwork, *aa;
1883   MPI_Comm     comm;
1884   IS           isrow_new, iscol_new;
1885   Mat_SeqBAIJ *aij;
1886 
1887   PetscFunctionBegin;
1888   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1889   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1890   PetscCallMPI(MPI_Comm_size(comm, &size));
1891   /* The compression and expansion should be avoided. Doesn't point
1892      out errors, might change the indices, hence buggey */
1893   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1894   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1895 
1896   if (call == MAT_REUSE_MATRIX) {
1897     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1898     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1899     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1900   } else {
1901     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1902   }
1903   PetscCall(ISDestroy(&isrow_new));
1904   PetscCall(ISDestroy(&iscol_new));
1905   /*
1906       m - number of local rows
1907       n - number of columns (same on all processors)
1908       rstart - first row in new global matrix generated
1909   */
1910   PetscCall(MatGetBlockSize(mat, &bs));
1911   PetscCall(MatGetSize(Mreuse, &m, &n));
1912   m = m / bs;
1913   n = n / bs;
1914 
1915   if (call == MAT_INITIAL_MATRIX) {
1916     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1917     ii  = aij->i;
1918     jj  = aij->j;
1919 
1920     /*
1921         Determine the number of non-zeros in the diagonal and off-diagonal
1922         portions of the matrix in order to do correct preallocation
1923     */
1924 
1925     /* first get start and end of "diagonal" columns */
1926     if (csize == PETSC_DECIDE) {
1927       PetscCall(ISGetSize(isrow, &mglobal));
1928       if (mglobal == n * bs) { /* square matrix */
1929         nlocal = m;
1930       } else {
1931         nlocal = n / size + ((n % size) > rank);
1932       }
1933     } else {
1934       nlocal = csize / bs;
1935     }
1936     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1937     rstart = rend - nlocal;
1938     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1939 
1940     /* next, compute all the lengths */
1941     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1942     for (i = 0; i < m; i++) {
1943       jend = ii[i + 1] - ii[i];
1944       olen = 0;
1945       dlen = 0;
1946       for (j = 0; j < jend; j++) {
1947         if (*jj < rstart || *jj >= rend) olen++;
1948         else dlen++;
1949         jj++;
1950       }
1951       olens[i] = olen;
1952       dlens[i] = dlen;
1953     }
1954     PetscCall(MatCreate(comm, &M));
1955     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1956     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
1957     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1958     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1959     PetscCall(PetscFree2(dlens, olens));
1960   } else {
1961     PetscInt ml, nl;
1962 
1963     M = *newmat;
1964     PetscCall(MatGetLocalSize(M, &ml, &nl));
1965     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
1966     PetscCall(MatZeroEntries(M));
1967     /*
1968          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1969        rather than the slower MatSetValues().
1970     */
1971     M->was_assembled = PETSC_TRUE;
1972     M->assembled     = PETSC_FALSE;
1973   }
1974   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
1975   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
1976   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1977   ii  = aij->i;
1978   jj  = aij->j;
1979   aa  = aij->a;
1980   for (i = 0; i < m; i++) {
1981     row   = rstart / bs + i;
1982     nz    = ii[i + 1] - ii[i];
1983     cwork = jj;
1984     jj += nz;
1985     vwork = aa;
1986     aa += nz * bs * bs;
1987     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
1988   }
1989 
1990   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
1991   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
1992   *newmat = M;
1993 
1994   /* save submatrix used in processor for next request */
1995   if (call == MAT_INITIAL_MATRIX) {
1996     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
1997     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) {
2003   MPI_Comm        comm, pcomm;
2004   PetscInt        clocal_size, nrows;
2005   const PetscInt *rows;
2006   PetscMPIInt     size;
2007   IS              crowp, lcolp;
2008 
2009   PetscFunctionBegin;
2010   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2011   /* make a collective version of 'rowp' */
2012   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2013   if (pcomm == comm) {
2014     crowp = rowp;
2015   } else {
2016     PetscCall(ISGetSize(rowp, &nrows));
2017     PetscCall(ISGetIndices(rowp, &rows));
2018     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2019     PetscCall(ISRestoreIndices(rowp, &rows));
2020   }
2021   PetscCall(ISSetPermutation(crowp));
2022   /* make a local version of 'colp' */
2023   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2024   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2025   if (size == 1) {
2026     lcolp = colp;
2027   } else {
2028     PetscCall(ISAllGather(colp, &lcolp));
2029   }
2030   PetscCall(ISSetPermutation(lcolp));
2031   /* now we just get the submatrix */
2032   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2033   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2034   /* clean up */
2035   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2036   if (size > 1) PetscCall(ISDestroy(&lcolp));
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) {
2041   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2042   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2043 
2044   PetscFunctionBegin;
2045   if (nghosts) *nghosts = B->nbs;
2046   if (ghosts) *ghosts = baij->garray;
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) {
2051   Mat          B;
2052   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2053   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2054   Mat_SeqAIJ  *b;
2055   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2056   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2057   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2058 
2059   PetscFunctionBegin;
2060   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2061   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2062 
2063   /* ----------------------------------------------------------------
2064      Tell every processor the number of nonzeros per row
2065   */
2066   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2067   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2068   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2069   displs = recvcounts + size;
2070   for (i = 0; i < size; i++) {
2071     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2072     displs[i]     = A->rmap->range[i] / bs;
2073   }
2074   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2075   /* ---------------------------------------------------------------
2076      Create the sequential matrix of the same type as the local block diagonal
2077   */
2078   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2079   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2080   PetscCall(MatSetType(B, MATSEQAIJ));
2081   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2082   b = (Mat_SeqAIJ *)B->data;
2083 
2084   /*--------------------------------------------------------------------
2085     Copy my part of matrix column indices over
2086   */
2087   sendcount  = ad->nz + bd->nz;
2088   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2089   a_jsendbuf = ad->j;
2090   b_jsendbuf = bd->j;
2091   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2092   cnt        = 0;
2093   for (i = 0; i < n; i++) {
2094     /* put in lower diagonal portion */
2095     m = bd->i[i + 1] - bd->i[i];
2096     while (m > 0) {
2097       /* is it above diagonal (in bd (compressed) numbering) */
2098       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2099       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2100       m--;
2101     }
2102 
2103     /* put in diagonal portion */
2104     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2105 
2106     /* put in upper diagonal portion */
2107     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2108   }
2109   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2110 
2111   /*--------------------------------------------------------------------
2112     Gather all column indices to all processors
2113   */
2114   for (i = 0; i < size; i++) {
2115     recvcounts[i] = 0;
2116     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2117   }
2118   displs[0] = 0;
2119   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2120   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2121   /*--------------------------------------------------------------------
2122     Assemble the matrix into useable form (note numerical values not yet set)
2123   */
2124   /* set the b->ilen (length of each row) values */
2125   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2126   /* set the b->i indices */
2127   b->i[0] = 0;
2128   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2129   PetscCall(PetscFree(lens));
2130   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2131   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2132   PetscCall(PetscFree(recvcounts));
2133 
2134   PetscCall(MatPropagateSymmetryOptions(A, B));
2135   *newmat = B;
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
2140   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2141   Vec          bb1 = NULL;
2142 
2143   PetscFunctionBegin;
2144   if (flag == SOR_APPLY_UPPER) {
2145     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2146     PetscFunctionReturn(0);
2147   }
2148 
2149   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2150 
2151   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2152     if (flag & SOR_ZERO_INITIAL_GUESS) {
2153       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2154       its--;
2155     }
2156 
2157     while (its--) {
2158       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2159       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2160 
2161       /* update rhs: bb1 = bb - B*x */
2162       PetscCall(VecScale(mat->lvec, -1.0));
2163       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2164 
2165       /* local sweep */
2166       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2167     }
2168   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2169     if (flag & SOR_ZERO_INITIAL_GUESS) {
2170       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2171       its--;
2172     }
2173     while (its--) {
2174       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2175       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2176 
2177       /* update rhs: bb1 = bb - B*x */
2178       PetscCall(VecScale(mat->lvec, -1.0));
2179       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2180 
2181       /* local sweep */
2182       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2183     }
2184   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2185     if (flag & SOR_ZERO_INITIAL_GUESS) {
2186       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2187       its--;
2188     }
2189     while (its--) {
2190       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2191       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2192 
2193       /* update rhs: bb1 = bb - B*x */
2194       PetscCall(VecScale(mat->lvec, -1.0));
2195       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2196 
2197       /* local sweep */
2198       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2199     }
2200   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2201 
2202   PetscCall(VecDestroy(&bb1));
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) {
2207   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2208   PetscInt     m, N, i, *garray = aij->garray;
2209   PetscInt     ib, jb, bs = A->rmap->bs;
2210   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2211   MatScalar   *a_val = a_aij->a;
2212   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2213   MatScalar   *b_val = b_aij->a;
2214   PetscReal   *work;
2215 
2216   PetscFunctionBegin;
2217   PetscCall(MatGetSize(A, &m, &N));
2218   PetscCall(PetscCalloc1(N, &work));
2219   if (type == NORM_2) {
2220     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2221       for (jb = 0; jb < bs; jb++) {
2222         for (ib = 0; ib < bs; ib++) {
2223           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2224           a_val++;
2225         }
2226       }
2227     }
2228     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2229       for (jb = 0; jb < bs; jb++) {
2230         for (ib = 0; ib < bs; ib++) {
2231           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2232           b_val++;
2233         }
2234       }
2235     }
2236   } else if (type == NORM_1) {
2237     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2238       for (jb = 0; jb < bs; jb++) {
2239         for (ib = 0; ib < bs; ib++) {
2240           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2241           a_val++;
2242         }
2243       }
2244     }
2245     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2246       for (jb = 0; jb < bs; jb++) {
2247         for (ib = 0; ib < bs; ib++) {
2248           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2249           b_val++;
2250         }
2251       }
2252     }
2253   } else if (type == NORM_INFINITY) {
2254     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2255       for (jb = 0; jb < bs; jb++) {
2256         for (ib = 0; ib < bs; ib++) {
2257           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2258           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2259           a_val++;
2260         }
2261       }
2262     }
2263     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2264       for (jb = 0; jb < bs; jb++) {
2265         for (ib = 0; ib < bs; ib++) {
2266           int col   = garray[b_aij->j[i]] * bs + jb;
2267           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2268           b_val++;
2269         }
2270       }
2271     }
2272   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2273     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2274       for (jb = 0; jb < bs; jb++) {
2275         for (ib = 0; ib < bs; ib++) {
2276           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2277           a_val++;
2278         }
2279       }
2280     }
2281     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2282       for (jb = 0; jb < bs; jb++) {
2283         for (ib = 0; ib < bs; ib++) {
2284           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2285           b_val++;
2286         }
2287       }
2288     }
2289   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2290     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2291       for (jb = 0; jb < bs; jb++) {
2292         for (ib = 0; ib < bs; ib++) {
2293           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2294           a_val++;
2295         }
2296       }
2297     }
2298     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2299       for (jb = 0; jb < bs; jb++) {
2300         for (ib = 0; ib < bs; ib++) {
2301           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2302           b_val++;
2303         }
2304       }
2305     }
2306   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2307   if (type == NORM_INFINITY) {
2308     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2309   } else {
2310     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2311   }
2312   PetscCall(PetscFree(work));
2313   if (type == NORM_2) {
2314     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2315   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2316     for (i = 0; i < N; i++) reductions[i] /= m;
2317   }
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) {
2322   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2323 
2324   PetscFunctionBegin;
2325   PetscCall(MatInvertBlockDiagonal(a->A, values));
2326   A->factorerrortype             = a->A->factorerrortype;
2327   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2328   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2329   PetscFunctionReturn(0);
2330 }
2331 
2332 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) {
2333   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2334   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2335 
2336   PetscFunctionBegin;
2337   if (!Y->preallocated) {
2338     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2339   } else if (!aij->nz) {
2340     PetscInt nonew = aij->nonew;
2341     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2342     aij->nonew = nonew;
2343   }
2344   PetscCall(MatShift_Basic(Y, a));
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) {
2349   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2350 
2351   PetscFunctionBegin;
2352   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2353   PetscCall(MatMissingDiagonal(a->A, missing, d));
2354   if (d) {
2355     PetscInt rstart;
2356     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2357     *d += rstart / A->rmap->bs;
2358   }
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) {
2363   PetscFunctionBegin;
2364   *a = ((Mat_MPIBAIJ *)A->data)->A;
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 /* -------------------------------------------------------------------*/
2369 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2370                                        MatGetRow_MPIBAIJ,
2371                                        MatRestoreRow_MPIBAIJ,
2372                                        MatMult_MPIBAIJ,
2373                                        /* 4*/ MatMultAdd_MPIBAIJ,
2374                                        MatMultTranspose_MPIBAIJ,
2375                                        MatMultTransposeAdd_MPIBAIJ,
2376                                        NULL,
2377                                        NULL,
2378                                        NULL,
2379                                        /*10*/ NULL,
2380                                        NULL,
2381                                        NULL,
2382                                        MatSOR_MPIBAIJ,
2383                                        MatTranspose_MPIBAIJ,
2384                                        /*15*/ MatGetInfo_MPIBAIJ,
2385                                        MatEqual_MPIBAIJ,
2386                                        MatGetDiagonal_MPIBAIJ,
2387                                        MatDiagonalScale_MPIBAIJ,
2388                                        MatNorm_MPIBAIJ,
2389                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2390                                        MatAssemblyEnd_MPIBAIJ,
2391                                        MatSetOption_MPIBAIJ,
2392                                        MatZeroEntries_MPIBAIJ,
2393                                        /*24*/ MatZeroRows_MPIBAIJ,
2394                                        NULL,
2395                                        NULL,
2396                                        NULL,
2397                                        NULL,
2398                                        /*29*/ MatSetUp_MPIBAIJ,
2399                                        NULL,
2400                                        NULL,
2401                                        MatGetDiagonalBlock_MPIBAIJ,
2402                                        NULL,
2403                                        /*34*/ MatDuplicate_MPIBAIJ,
2404                                        NULL,
2405                                        NULL,
2406                                        NULL,
2407                                        NULL,
2408                                        /*39*/ MatAXPY_MPIBAIJ,
2409                                        MatCreateSubMatrices_MPIBAIJ,
2410                                        MatIncreaseOverlap_MPIBAIJ,
2411                                        MatGetValues_MPIBAIJ,
2412                                        MatCopy_MPIBAIJ,
2413                                        /*44*/ NULL,
2414                                        MatScale_MPIBAIJ,
2415                                        MatShift_MPIBAIJ,
2416                                        NULL,
2417                                        MatZeroRowsColumns_MPIBAIJ,
2418                                        /*49*/ NULL,
2419                                        NULL,
2420                                        NULL,
2421                                        NULL,
2422                                        NULL,
2423                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2424                                        NULL,
2425                                        MatSetUnfactored_MPIBAIJ,
2426                                        MatPermute_MPIBAIJ,
2427                                        MatSetValuesBlocked_MPIBAIJ,
2428                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2429                                        MatDestroy_MPIBAIJ,
2430                                        MatView_MPIBAIJ,
2431                                        NULL,
2432                                        NULL,
2433                                        /*64*/ NULL,
2434                                        NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2439                                        NULL,
2440                                        NULL,
2441                                        NULL,
2442                                        NULL,
2443                                        /*74*/ NULL,
2444                                        MatFDColoringApply_BAIJ,
2445                                        NULL,
2446                                        NULL,
2447                                        NULL,
2448                                        /*79*/ NULL,
2449                                        NULL,
2450                                        NULL,
2451                                        NULL,
2452                                        MatLoad_MPIBAIJ,
2453                                        /*84*/ NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        NULL,
2457                                        NULL,
2458                                        /*89*/ NULL,
2459                                        NULL,
2460                                        NULL,
2461                                        NULL,
2462                                        NULL,
2463                                        /*94*/ NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        NULL,
2467                                        NULL,
2468                                        /*99*/ NULL,
2469                                        NULL,
2470                                        NULL,
2471                                        MatConjugate_MPIBAIJ,
2472                                        NULL,
2473                                        /*104*/ NULL,
2474                                        MatRealPart_MPIBAIJ,
2475                                        MatImaginaryPart_MPIBAIJ,
2476                                        NULL,
2477                                        NULL,
2478                                        /*109*/ NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        MatMissingDiagonal_MPIBAIJ,
2483                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2484                                        NULL,
2485                                        MatGetGhosts_MPIBAIJ,
2486                                        NULL,
2487                                        NULL,
2488                                        /*119*/ NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        MatGetMultiProcBlock_MPIBAIJ,
2493                                        /*124*/ NULL,
2494                                        MatGetColumnReductions_MPIBAIJ,
2495                                        MatInvertBlockDiagonal_MPIBAIJ,
2496                                        NULL,
2497                                        NULL,
2498                                        /*129*/ NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        /*134*/ NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        /*139*/ MatSetBlockSizes_Default,
2509                                        NULL,
2510                                        NULL,
2511                                        MatFDColoringSetUp_MPIXAIJ,
2512                                        NULL,
2513                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        /*150*/ NULL};
2520 
2521 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2522 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2523 
2524 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) {
2525   PetscInt        m, rstart, cstart, cend;
2526   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2527   const PetscInt *JJ          = NULL;
2528   PetscScalar    *values      = NULL;
2529   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2530   PetscBool       nooffprocentries;
2531 
2532   PetscFunctionBegin;
2533   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2534   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2535   PetscCall(PetscLayoutSetUp(B->rmap));
2536   PetscCall(PetscLayoutSetUp(B->cmap));
2537   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2538   m      = B->rmap->n / bs;
2539   rstart = B->rmap->rstart / bs;
2540   cstart = B->cmap->rstart / bs;
2541   cend   = B->cmap->rend / bs;
2542 
2543   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2544   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2545   for (i = 0; i < m; i++) {
2546     nz = ii[i + 1] - ii[i];
2547     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2548     nz_max = PetscMax(nz_max, nz);
2549     dlen   = 0;
2550     olen   = 0;
2551     JJ     = jj + ii[i];
2552     for (j = 0; j < nz; j++) {
2553       if (*JJ < cstart || *JJ >= cend) olen++;
2554       else dlen++;
2555       JJ++;
2556     }
2557     d_nnz[i] = dlen;
2558     o_nnz[i] = olen;
2559   }
2560   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2561   PetscCall(PetscFree2(d_nnz, o_nnz));
2562 
2563   values = (PetscScalar *)V;
2564   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2565   for (i = 0; i < m; i++) {
2566     PetscInt        row   = i + rstart;
2567     PetscInt        ncols = ii[i + 1] - ii[i];
2568     const PetscInt *icols = jj + ii[i];
2569     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2570       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2571       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2572     } else { /* block ordering does not match so we can only insert one block at a time. */
2573       PetscInt j;
2574       for (j = 0; j < ncols; j++) {
2575         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2576         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2577       }
2578     }
2579   }
2580 
2581   if (!V) PetscCall(PetscFree(values));
2582   nooffprocentries    = B->nooffprocentries;
2583   B->nooffprocentries = PETSC_TRUE;
2584   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2585   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2586   B->nooffprocentries = nooffprocentries;
2587 
2588   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 /*@C
2593    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2594 
2595    Collective
2596 
2597    Input Parameters:
2598 +  B - the matrix
2599 .  bs - the block size
2600 .  i - the indices into j for the start of each local row (starts with zero)
2601 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2602 -  v - optional values in the matrix
2603 
2604    Level: advanced
2605 
2606    Notes:
2607     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2608    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2609    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2610    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2611    block column and the second index is over columns within a block.
2612 
2613    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2614 
2615 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2616 @*/
2617 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
2618   PetscFunctionBegin;
2619   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2620   PetscValidType(B, 1);
2621   PetscValidLogicalCollectiveInt(B, bs, 2);
2622   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) {
2627   Mat_MPIBAIJ *b;
2628   PetscInt     i;
2629   PetscMPIInt  size;
2630 
2631   PetscFunctionBegin;
2632   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2633   PetscCall(PetscLayoutSetUp(B->rmap));
2634   PetscCall(PetscLayoutSetUp(B->cmap));
2635   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2636 
2637   if (d_nnz) {
2638     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2639   }
2640   if (o_nnz) {
2641     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2642   }
2643 
2644   b      = (Mat_MPIBAIJ *)B->data;
2645   b->bs2 = bs * bs;
2646   b->mbs = B->rmap->n / bs;
2647   b->nbs = B->cmap->n / bs;
2648   b->Mbs = B->rmap->N / bs;
2649   b->Nbs = B->cmap->N / bs;
2650 
2651   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2652   b->rstartbs = B->rmap->rstart / bs;
2653   b->rendbs   = B->rmap->rend / bs;
2654   b->cstartbs = B->cmap->rstart / bs;
2655   b->cendbs   = B->cmap->rend / bs;
2656 
2657 #if defined(PETSC_USE_CTABLE)
2658   PetscCall(PetscTableDestroy(&b->colmap));
2659 #else
2660   PetscCall(PetscFree(b->colmap));
2661 #endif
2662   PetscCall(PetscFree(b->garray));
2663   PetscCall(VecDestroy(&b->lvec));
2664   PetscCall(VecScatterDestroy(&b->Mvctx));
2665 
2666   /* Because the B will have been resized we simply destroy it and create a new one each time */
2667   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2668   PetscCall(MatDestroy(&b->B));
2669   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2670   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2671   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2672 
2673   if (!B->preallocated) {
2674     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2675     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2676     PetscCall(MatSetType(b->A, MATSEQBAIJ));
2677     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2678   }
2679 
2680   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2681   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2682   B->preallocated  = PETSC_TRUE;
2683   B->was_assembled = PETSC_FALSE;
2684   B->assembled     = PETSC_FALSE;
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2689 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2690 
2691 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) {
2692   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2693   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2694   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2695   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2696 
2697   PetscFunctionBegin;
2698   PetscCall(PetscMalloc1(M + 1, &ii));
2699   ii[0] = 0;
2700   for (i = 0; i < M; i++) {
2701     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2702     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2703     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2704     /* remove one from count of matrix has diagonal */
2705     for (j = id[i]; j < id[i + 1]; j++) {
2706       if (jd[j] == i) {
2707         ii[i + 1]--;
2708         break;
2709       }
2710     }
2711   }
2712   PetscCall(PetscMalloc1(ii[M], &jj));
2713   cnt = 0;
2714   for (i = 0; i < M; i++) {
2715     for (j = io[i]; j < io[i + 1]; j++) {
2716       if (garray[jo[j]] > rstart) break;
2717       jj[cnt++] = garray[jo[j]];
2718     }
2719     for (k = id[i]; k < id[i + 1]; k++) {
2720       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2721     }
2722     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2723   }
2724   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2729 
2730 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2731 
2732 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
2733   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2734   Mat_MPIAIJ  *b;
2735   Mat          B;
2736 
2737   PetscFunctionBegin;
2738   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2739 
2740   if (reuse == MAT_REUSE_MATRIX) {
2741     B = *newmat;
2742   } else {
2743     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2744     PetscCall(MatSetType(B, MATMPIAIJ));
2745     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2746     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2747     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2748     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2749   }
2750   b = (Mat_MPIAIJ *)B->data;
2751 
2752   if (reuse == MAT_REUSE_MATRIX) {
2753     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2754     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2755   } else {
2756     PetscCall(MatDestroy(&b->A));
2757     PetscCall(MatDestroy(&b->B));
2758     PetscCall(MatDisAssemble_MPIBAIJ(A));
2759     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2760     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2761     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2762     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2763   }
2764   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2765   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2766 
2767   if (reuse == MAT_INPLACE_MATRIX) {
2768     PetscCall(MatHeaderReplace(A, &B));
2769   } else {
2770     *newmat = B;
2771   }
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 /*MC
2776    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2777 
2778    Options Database Keys:
2779 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2780 . -mat_block_size <bs> - set the blocksize used to store the matrix
2781 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2782 - -mat_use_hash_table <fact> - set hash table factor
2783 
2784    Level: beginner
2785 
2786    Note:
2787     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2788     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2789 
2790 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2791 M*/
2792 
2793 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2794 
2795 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) {
2796   Mat_MPIBAIJ *b;
2797   PetscBool    flg = PETSC_FALSE;
2798 
2799   PetscFunctionBegin;
2800   PetscCall(PetscNew(&b));
2801   B->data = (void *)b;
2802 
2803   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2804   B->assembled = PETSC_FALSE;
2805 
2806   B->insertmode = NOT_SET_VALUES;
2807   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2808   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2809 
2810   /* build local table of row and column ownerships */
2811   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2812 
2813   /* build cache for off array entries formed */
2814   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2815 
2816   b->donotstash  = PETSC_FALSE;
2817   b->colmap      = NULL;
2818   b->garray      = NULL;
2819   b->roworiented = PETSC_TRUE;
2820 
2821   /* stuff used in block assembly */
2822   b->barray = NULL;
2823 
2824   /* stuff used for matrix vector multiply */
2825   b->lvec  = NULL;
2826   b->Mvctx = NULL;
2827 
2828   /* stuff for MatGetRow() */
2829   b->rowindices   = NULL;
2830   b->rowvalues    = NULL;
2831   b->getrowactive = PETSC_FALSE;
2832 
2833   /* hash table stuff */
2834   b->ht           = NULL;
2835   b->hd           = NULL;
2836   b->ht_size      = 0;
2837   b->ht_flag      = PETSC_FALSE;
2838   b->ht_fact      = 0;
2839   b->ht_total_ct  = 0;
2840   b->ht_insert_ct = 0;
2841 
2842   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2843   b->ijonly = PETSC_FALSE;
2844 
2845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2846   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2848 #if defined(PETSC_HAVE_HYPRE)
2849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2850 #endif
2851   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2852   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2853   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2854   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2858   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2859 
2860   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2861   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2862   if (flg) {
2863     PetscReal fact = 1.39;
2864     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2865     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2866     if (fact <= 1.0) fact = 1.39;
2867     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2868     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2869   }
2870   PetscOptionsEnd();
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 /*MC
2875    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2876 
2877    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2878    and `MATMPIBAIJ` otherwise.
2879 
2880    Options Database Keys:
2881 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2882 
2883   Level: beginner
2884 
2885 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2886 M*/
2887 
2888 /*@C
2889    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2890    (block compressed row).  For good matrix assembly performance
2891    the user should preallocate the matrix storage by setting the parameters
2892    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2893    performance can be increased by more than a factor of 50.
2894 
2895    Collective on Mat
2896 
2897    Input Parameters:
2898 +  B - the matrix
2899 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2900           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2901 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2902            submatrix  (same for all local rows)
2903 .  d_nnz - array containing the number of block nonzeros in the various block rows
2904            of the in diagonal portion of the local (possibly different for each block
2905            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2906            set it even if it is zero.
2907 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2908            submatrix (same for all local rows).
2909 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2910            off-diagonal portion of the local submatrix (possibly different for
2911            each block row) or NULL.
2912 
2913    If the *_nnz parameter is given then the *_nz parameter is ignored
2914 
2915    Options Database Keys:
2916 +   -mat_block_size - size of the blocks to use
2917 -   -mat_use_hash_table <fact> - set hash table factor
2918 
2919    Notes:
2920    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2921    than it must be used on all processors that share the object for that argument.
2922 
2923    Storage Information:
2924    For a square global matrix we define each processor's diagonal portion
2925    to be its local rows and the corresponding columns (a square submatrix);
2926    each processor's off-diagonal portion encompasses the remainder of the
2927    local matrix (a rectangular submatrix).
2928 
2929    The user can specify preallocated storage for the diagonal part of
2930    the local submatrix with either d_nz or d_nnz (not both).  Set
2931    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2932    memory allocation.  Likewise, specify preallocated storage for the
2933    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2934 
2935    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2936    the figure below we depict these three local rows and all columns (0-11).
2937 
2938 .vb
2939            0 1 2 3 4 5 6 7 8 9 10 11
2940           --------------------------
2941    row 3  |o o o d d d o o o o  o  o
2942    row 4  |o o o d d d o o o o  o  o
2943    row 5  |o o o d d d o o o o  o  o
2944           --------------------------
2945 .ve
2946 
2947    Thus, any entries in the d locations are stored in the d (diagonal)
2948    submatrix, and any entries in the o locations are stored in the
2949    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2950    stored simply in the `MATSEQBAIJ` format for compressed row storage.
2951 
2952    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2953    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2954    In general, for PDE problems in which most nonzeros are near the diagonal,
2955    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2956    or you will get TERRIBLE performance; see the users' manual chapter on
2957    matrices.
2958 
2959    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2960    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2961    You can also run with the option -info and look for messages with the string
2962    malloc in them to see if additional memory allocation was needed.
2963 
2964    Level: intermediate
2965 
2966 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
2967 @*/
2968 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2969   PetscFunctionBegin;
2970   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2971   PetscValidType(B, 1);
2972   PetscValidLogicalCollectiveInt(B, bs, 2);
2973   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 /*@C
2978    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
2979    (block compressed row).  For good matrix assembly performance
2980    the user should preallocate the matrix storage by setting the parameters
2981    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2982    performance can be increased by more than a factor of 50.
2983 
2984    Collective
2985 
2986    Input Parameters:
2987 +  comm - MPI communicator
2988 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2989           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2990 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2991            This value should be the same as the local size used in creating the
2992            y vector for the matrix-vector product y = Ax.
2993 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2994            This value should be the same as the local size used in creating the
2995            x vector for the matrix-vector product y = Ax.
2996 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2997 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2998 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2999            submatrix  (same for all local rows)
3000 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3001            of the in diagonal portion of the local (possibly different for each block
3002            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3003            and set it even if it is zero.
3004 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3005            submatrix (same for all local rows).
3006 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3007            off-diagonal portion of the local submatrix (possibly different for
3008            each block row) or NULL.
3009 
3010    Output Parameter:
3011 .  A - the matrix
3012 
3013    Options Database Keys:
3014 +   -mat_block_size - size of the blocks to use
3015 -   -mat_use_hash_table <fact> - set hash table factor
3016 
3017    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3018    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3019    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3020 
3021    Notes:
3022    If the *_nnz parameter is given then the *_nz parameter is ignored
3023 
3024    A nonzero block is any block that as 1 or more nonzeros in it
3025 
3026    The user MUST specify either the local or global matrix dimensions
3027    (possibly both).
3028 
3029    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3030    than it must be used on all processors that share the object for that argument.
3031 
3032    Storage Information:
3033    For a square global matrix we define each processor's diagonal portion
3034    to be its local rows and the corresponding columns (a square submatrix);
3035    each processor's off-diagonal portion encompasses the remainder of the
3036    local matrix (a rectangular submatrix).
3037 
3038    The user can specify preallocated storage for the diagonal part of
3039    the local submatrix with either d_nz or d_nnz (not both).  Set
3040    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3041    memory allocation.  Likewise, specify preallocated storage for the
3042    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3043 
3044    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3045    the figure below we depict these three local rows and all columns (0-11).
3046 
3047 .vb
3048            0 1 2 3 4 5 6 7 8 9 10 11
3049           --------------------------
3050    row 3  |o o o d d d o o o o  o  o
3051    row 4  |o o o d d d o o o o  o  o
3052    row 5  |o o o d d d o o o o  o  o
3053           --------------------------
3054 .ve
3055 
3056    Thus, any entries in the d locations are stored in the d (diagonal)
3057    submatrix, and any entries in the o locations are stored in the
3058    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3059    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3060 
3061    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3062    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3063    In general, for PDE problems in which most nonzeros are near the diagonal,
3064    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3065    or you will get TERRIBLE performance; see the users' manual chapter on
3066    matrices.
3067 
3068    Level: intermediate
3069 
3070 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3071 @*/
3072 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
3073   PetscMPIInt size;
3074 
3075   PetscFunctionBegin;
3076   PetscCall(MatCreate(comm, A));
3077   PetscCall(MatSetSizes(*A, m, n, M, N));
3078   PetscCallMPI(MPI_Comm_size(comm, &size));
3079   if (size > 1) {
3080     PetscCall(MatSetType(*A, MATMPIBAIJ));
3081     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3082   } else {
3083     PetscCall(MatSetType(*A, MATSEQBAIJ));
3084     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3085   }
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
3090   Mat          mat;
3091   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3092   PetscInt     len = 0;
3093 
3094   PetscFunctionBegin;
3095   *newmat = NULL;
3096   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3097   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3098   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3099 
3100   mat->factortype   = matin->factortype;
3101   mat->preallocated = PETSC_TRUE;
3102   mat->assembled    = PETSC_TRUE;
3103   mat->insertmode   = NOT_SET_VALUES;
3104 
3105   a             = (Mat_MPIBAIJ *)mat->data;
3106   mat->rmap->bs = matin->rmap->bs;
3107   a->bs2        = oldmat->bs2;
3108   a->mbs        = oldmat->mbs;
3109   a->nbs        = oldmat->nbs;
3110   a->Mbs        = oldmat->Mbs;
3111   a->Nbs        = oldmat->Nbs;
3112 
3113   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3114   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3115 
3116   a->size         = oldmat->size;
3117   a->rank         = oldmat->rank;
3118   a->donotstash   = oldmat->donotstash;
3119   a->roworiented  = oldmat->roworiented;
3120   a->rowindices   = NULL;
3121   a->rowvalues    = NULL;
3122   a->getrowactive = PETSC_FALSE;
3123   a->barray       = NULL;
3124   a->rstartbs     = oldmat->rstartbs;
3125   a->rendbs       = oldmat->rendbs;
3126   a->cstartbs     = oldmat->cstartbs;
3127   a->cendbs       = oldmat->cendbs;
3128 
3129   /* hash table stuff */
3130   a->ht           = NULL;
3131   a->hd           = NULL;
3132   a->ht_size      = 0;
3133   a->ht_flag      = oldmat->ht_flag;
3134   a->ht_fact      = oldmat->ht_fact;
3135   a->ht_total_ct  = 0;
3136   a->ht_insert_ct = 0;
3137 
3138   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3139   if (oldmat->colmap) {
3140 #if defined(PETSC_USE_CTABLE)
3141     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
3142 #else
3143     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3144     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3145 #endif
3146   } else a->colmap = NULL;
3147 
3148   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3149     PetscCall(PetscMalloc1(len, &a->garray));
3150     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3151   } else a->garray = NULL;
3152 
3153   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3154   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3155   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3156 
3157   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3158   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3159   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3160   *newmat = mat;
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3165 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) {
3166   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3167   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3168   PetscScalar *matvals;
3169 
3170   PetscFunctionBegin;
3171   PetscCall(PetscViewerSetUp(viewer));
3172 
3173   /* read in matrix header */
3174   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3175   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3176   M  = header[1];
3177   N  = header[2];
3178   nz = header[3];
3179   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3180   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3181   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3182 
3183   /* set block sizes from the viewer's .info file */
3184   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3185   /* set local sizes if not set already */
3186   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3187   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3188   /* set global sizes if not set already */
3189   if (mat->rmap->N < 0) mat->rmap->N = M;
3190   if (mat->cmap->N < 0) mat->cmap->N = N;
3191   PetscCall(PetscLayoutSetUp(mat->rmap));
3192   PetscCall(PetscLayoutSetUp(mat->cmap));
3193 
3194   /* check if the matrix sizes are correct */
3195   PetscCall(MatGetSize(mat, &rows, &cols));
3196   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3197   PetscCall(MatGetBlockSize(mat, &bs));
3198   PetscCall(MatGetLocalSize(mat, &m, &n));
3199   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3200   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3201   mbs = m / bs;
3202   nbs = n / bs;
3203 
3204   /* read in row lengths and build row indices */
3205   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3206   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3207   rowidxs[0] = 0;
3208   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3209   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3210   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3211 
3212   /* read in column indices and matrix values */
3213   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3214   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3215   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3216 
3217   {                /* preallocate matrix storage */
3218     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3219     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3220     PetscBool  sbaij, done;
3221     PetscInt  *d_nnz, *o_nnz;
3222 
3223     PetscCall(PetscBTCreate(nbs, &bt));
3224     PetscCall(PetscHSetICreate(&ht));
3225     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3226     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3227     for (i = 0; i < mbs; i++) {
3228       PetscCall(PetscBTMemzero(nbs, bt));
3229       PetscCall(PetscHSetIClear(ht));
3230       for (k = 0; k < bs; k++) {
3231         PetscInt row = bs * i + k;
3232         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3233           PetscInt col = colidxs[j];
3234           if (!sbaij || col >= row) {
3235             if (col >= cs && col < ce) {
3236               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3237             } else {
3238               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3239               if (done) o_nnz[i]++;
3240             }
3241           }
3242         }
3243       }
3244     }
3245     PetscCall(PetscBTDestroy(&bt));
3246     PetscCall(PetscHSetIDestroy(&ht));
3247     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3248     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3249     PetscCall(PetscFree2(d_nnz, o_nnz));
3250   }
3251 
3252   /* store matrix values */
3253   for (i = 0; i < m; i++) {
3254     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3255     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3256   }
3257 
3258   PetscCall(PetscFree(rowidxs));
3259   PetscCall(PetscFree2(colidxs, matvals));
3260   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3261   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) {
3266   PetscBool isbinary;
3267 
3268   PetscFunctionBegin;
3269   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3270   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3271   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 /*@
3276    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3277 
3278    Input Parameters:
3279 +  mat  - the matrix
3280 -  fact - factor
3281 
3282    Options Database Key:
3283 .  -mat_use_hash_table <fact> - provide the factor
3284 
3285    Level: advanced
3286 
3287 .seealso: `MATMPIBAIJ`, `MatSetOption()`
3288 @*/
3289 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) {
3290   PetscFunctionBegin;
3291   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3292   PetscFunctionReturn(0);
3293 }
3294 
3295 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) {
3296   Mat_MPIBAIJ *baij;
3297 
3298   PetscFunctionBegin;
3299   baij          = (Mat_MPIBAIJ *)mat->data;
3300   baij->ht_fact = fact;
3301   PetscFunctionReturn(0);
3302 }
3303 
3304 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) {
3305   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3306   PetscBool    flg;
3307 
3308   PetscFunctionBegin;
3309   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3310   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3311   if (Ad) *Ad = a->A;
3312   if (Ao) *Ao = a->B;
3313   if (colmap) *colmap = a->garray;
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 /*
3318     Special version for direct calls from Fortran (to eliminate two function call overheads
3319 */
3320 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3321 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3322 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3323 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3324 #endif
3325 
3326 /*@C
3327   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3328 
3329   Collective on Mat
3330 
3331   Input Parameters:
3332 + mat - the matrix
3333 . min - number of input rows
3334 . im - input rows
3335 . nin - number of input columns
3336 . in - input columns
3337 . v - numerical values input
3338 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3339 
3340   Developer Note:
3341     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3342 
3343   Level: advanced
3344 
3345 .seealso: `MatSetValuesBlocked()`
3346 @*/
3347 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) {
3348   /* convert input arguments to C version */
3349   Mat        mat = *matin;
3350   PetscInt   m = *min, n = *nin;
3351   InsertMode addv = *addvin;
3352 
3353   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3354   const MatScalar *value;
3355   MatScalar       *barray      = baij->barray;
3356   PetscBool        roworiented = baij->roworiented;
3357   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3358   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3359   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3360 
3361   PetscFunctionBegin;
3362   /* tasks normally handled by MatSetValuesBlocked() */
3363   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3364   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3365   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3366   if (mat->assembled) {
3367     mat->was_assembled = PETSC_TRUE;
3368     mat->assembled     = PETSC_FALSE;
3369   }
3370   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3371 
3372   if (!barray) {
3373     PetscCall(PetscMalloc1(bs2, &barray));
3374     baij->barray = barray;
3375   }
3376 
3377   if (roworiented) stepval = (n - 1) * bs;
3378   else stepval = (m - 1) * bs;
3379 
3380   for (i = 0; i < m; i++) {
3381     if (im[i] < 0) continue;
3382     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3383     if (im[i] >= rstart && im[i] < rend) {
3384       row = im[i] - rstart;
3385       for (j = 0; j < n; j++) {
3386         /* If NumCol = 1 then a copy is not required */
3387         if ((roworiented) && (n == 1)) {
3388           barray = (MatScalar *)v + i * bs2;
3389         } else if ((!roworiented) && (m == 1)) {
3390           barray = (MatScalar *)v + j * bs2;
3391         } else { /* Here a copy is required */
3392           if (roworiented) {
3393             value = v + i * (stepval + bs) * bs + j * bs;
3394           } else {
3395             value = v + j * (stepval + bs) * bs + i * bs;
3396           }
3397           for (ii = 0; ii < bs; ii++, value += stepval) {
3398             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3399           }
3400           barray -= bs2;
3401         }
3402 
3403         if (in[j] >= cstart && in[j] < cend) {
3404           col = in[j] - cstart;
3405           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3406         } else if (in[j] < 0) {
3407           continue;
3408         } else {
3409           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3410           if (mat->was_assembled) {
3411             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3412 
3413 #if defined(PETSC_USE_DEBUG)
3414 #if defined(PETSC_USE_CTABLE)
3415             {
3416               PetscInt data;
3417               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
3418               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3419             }
3420 #else
3421             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3422 #endif
3423 #endif
3424 #if defined(PETSC_USE_CTABLE)
3425             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
3426             col = (col - 1) / bs;
3427 #else
3428             col = (baij->colmap[in[j]] - 1) / bs;
3429 #endif
3430             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3431               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3432               col = in[j];
3433             }
3434           } else col = in[j];
3435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3436         }
3437       }
3438     } else {
3439       if (!baij->donotstash) {
3440         if (roworiented) {
3441           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3442         } else {
3443           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3444         }
3445       }
3446     }
3447   }
3448 
3449   /* task normally handled by MatSetValuesBlocked() */
3450   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3451   PetscFunctionReturn(0);
3452 }
3453 
3454 /*@
3455      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3456          CSR format the local rows.
3457 
3458    Collective
3459 
3460    Input Parameters:
3461 +  comm - MPI communicator
3462 .  bs - the block size, only a block size of 1 is supported
3463 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3464 .  n - This value should be the same as the local size used in creating the
3465        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3466        calculated if N is given) For square matrices n is almost always m.
3467 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3468 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3469 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3470 .   j - column indices
3471 -   a - matrix values
3472 
3473    Output Parameter:
3474 .   mat - the matrix
3475 
3476    Level: intermediate
3477 
3478    Notes:
3479        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3480      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3481      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3482 
3483      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3484      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3485      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3486      with column-major ordering within blocks.
3487 
3488        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3489 
3490 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3491           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3492 @*/
3493 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
3494   PetscFunctionBegin;
3495   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3496   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3497   PetscCall(MatCreate(comm, mat));
3498   PetscCall(MatSetSizes(*mat, m, n, M, N));
3499   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3500   PetscCall(MatSetBlockSize(*mat, bs));
3501   PetscCall(MatSetUp(*mat));
3502   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3503   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3504   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
3509   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3510   PetscInt    *indx;
3511   PetscScalar *values;
3512 
3513   PetscFunctionBegin;
3514   PetscCall(MatGetSize(inmat, &m, &N));
3515   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3516     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3517     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3518     PetscInt    *bindx, rmax = a->rmax, j;
3519     PetscMPIInt  rank, size;
3520 
3521     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3522     mbs = m / bs;
3523     Nbs = N / cbs;
3524     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3525     nbs = n / cbs;
3526 
3527     PetscCall(PetscMalloc1(rmax, &bindx));
3528     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3529 
3530     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3531     PetscCallMPI(MPI_Comm_rank(comm, &size));
3532     if (rank == size - 1) {
3533       /* Check sum(nbs) = Nbs */
3534       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3535     }
3536 
3537     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3538     for (i = 0; i < mbs; i++) {
3539       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3540       nnz = nnz / bs;
3541       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3542       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3543       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3544     }
3545     PetscCall(PetscFree(bindx));
3546 
3547     PetscCall(MatCreate(comm, outmat));
3548     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3549     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3550     PetscCall(MatSetType(*outmat, MATBAIJ));
3551     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3552     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3553     MatPreallocateEnd(dnz, onz);
3554     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3555   }
3556 
3557   /* numeric phase */
3558   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3559   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3560 
3561   for (i = 0; i < m; i++) {
3562     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3563     Ii = i + rstart;
3564     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3565     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3566   }
3567   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3568   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3569   PetscFunctionReturn(0);
3570 }
3571