xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 458b0db5a59538d1ac4b30bd659cdfc3dbce12fa)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
663     row = idxm[i] - bsrstart;
664     for (j = 0; j < n; j++) {
665       if (idxn[j] < 0) continue; /* negative column */
666       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667       if (idxn[j] >= bscstart && idxn[j] < bscend) {
668         col = idxn[j] - bscstart;
669         PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670       } else {
671         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674         data--;
675 #else
676         data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
679         else {
680           col = data + idxn[j] % bs;
681           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682         }
683       }
684     }
685   }
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
690 {
691   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
692   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
693   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
694   PetscReal    sum = 0.0;
695   MatScalar   *v;
696 
697   PetscFunctionBegin;
698   if (baij->size == 1) {
699     PetscCall(MatNorm(baij->A, type, nrm));
700   } else {
701     if (type == NORM_FROBENIUS) {
702       v  = amat->a;
703       nz = amat->nz * bs2;
704       for (i = 0; i < nz; i++) {
705         sum += PetscRealPart(PetscConj(*v) * (*v));
706         v++;
707       }
708       v  = bmat->a;
709       nz = bmat->nz * bs2;
710       for (i = 0; i < nz; i++) {
711         sum += PetscRealPart(PetscConj(*v) * (*v));
712         v++;
713       }
714       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
715       *nrm = PetscSqrtReal(*nrm);
716     } else if (type == NORM_1) { /* max column sum */
717       PetscReal *tmp;
718       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
719 
720       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
721       v  = amat->a;
722       jj = amat->j;
723       for (i = 0; i < amat->nz; i++) {
724         for (j = 0; j < bs; j++) {
725           col = bs * (cstart + *jj) + j; /* column index */
726           for (row = 0; row < bs; row++) {
727             tmp[col] += PetscAbsScalar(*v);
728             v++;
729           }
730         }
731         jj++;
732       }
733       v  = bmat->a;
734       jj = bmat->j;
735       for (i = 0; i < bmat->nz; i++) {
736         for (j = 0; j < bs; j++) {
737           col = bs * garray[*jj] + j;
738           for (row = 0; row < bs; row++) {
739             tmp[col] += PetscAbsScalar(*v);
740             v++;
741           }
742         }
743         jj++;
744       }
745       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
746       *nrm = 0.0;
747       for (j = 0; j < mat->cmap->N; j++) {
748         if (tmp[j] > *nrm) *nrm = tmp[j];
749       }
750       PetscCall(PetscFree(tmp));
751     } else if (type == NORM_INFINITY) { /* max row sum */
752       PetscReal *sums;
753       PetscCall(PetscMalloc1(bs, &sums));
754       sum = 0.0;
755       for (j = 0; j < amat->mbs; j++) {
756         for (row = 0; row < bs; row++) sums[row] = 0.0;
757         v  = amat->a + bs2 * amat->i[j];
758         nz = amat->i[j + 1] - amat->i[j];
759         for (i = 0; i < nz; i++) {
760           for (col = 0; col < bs; col++) {
761             for (row = 0; row < bs; row++) {
762               sums[row] += PetscAbsScalar(*v);
763               v++;
764             }
765           }
766         }
767         v  = bmat->a + bs2 * bmat->i[j];
768         nz = bmat->i[j + 1] - bmat->i[j];
769         for (i = 0; i < nz; i++) {
770           for (col = 0; col < bs; col++) {
771             for (row = 0; row < bs; row++) {
772               sums[row] += PetscAbsScalar(*v);
773               v++;
774             }
775           }
776         }
777         for (row = 0; row < bs; row++) {
778           if (sums[row] > sum) sum = sums[row];
779         }
780       }
781       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
782       PetscCall(PetscFree(sums));
783     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
784   }
785   PetscFunctionReturn(PETSC_SUCCESS);
786 }
787 
788 /*
789   Creates the hash table, and sets the table
790   This table is created only once.
791   If new entries need to be added to the matrix
792   then the hash table has to be destroyed and
793   recreated.
794 */
795 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
796 {
797   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
798   Mat          A = baij->A, B = baij->B;
799   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
800   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
801   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
802   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
803   PetscInt    *HT, key;
804   MatScalar  **HD;
805   PetscReal    tmp;
806 #if defined(PETSC_USE_INFO)
807   PetscInt ct = 0, max = 0;
808 #endif
809 
810   PetscFunctionBegin;
811   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
812 
813   baij->ht_size = (PetscInt)(factor * nz);
814   ht_size       = baij->ht_size;
815 
816   /* Allocate Memory for Hash Table */
817   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
818   HD = baij->hd;
819   HT = baij->ht;
820 
821   /* Loop Over A */
822   for (i = 0; i < a->mbs; i++) {
823     for (j = ai[i]; j < ai[i + 1]; j++) {
824       row = i + rstart;
825       col = aj[j] + cstart;
826 
827       key = row * Nbs + col + 1;
828       h1  = HASH(ht_size, key, tmp);
829       for (k = 0; k < ht_size; k++) {
830         if (!HT[(h1 + k) % ht_size]) {
831           HT[(h1 + k) % ht_size] = key;
832           HD[(h1 + k) % ht_size] = a->a + j * bs2;
833           break;
834 #if defined(PETSC_USE_INFO)
835         } else {
836           ct++;
837 #endif
838         }
839       }
840 #if defined(PETSC_USE_INFO)
841       if (k > max) max = k;
842 #endif
843     }
844   }
845   /* Loop Over B */
846   for (i = 0; i < b->mbs; i++) {
847     for (j = bi[i]; j < bi[i + 1]; j++) {
848       row = i + rstart;
849       col = garray[bj[j]];
850       key = row * Nbs + col + 1;
851       h1  = HASH(ht_size, key, tmp);
852       for (k = 0; k < ht_size; k++) {
853         if (!HT[(h1 + k) % ht_size]) {
854           HT[(h1 + k) % ht_size] = key;
855           HD[(h1 + k) % ht_size] = b->a + j * bs2;
856           break;
857 #if defined(PETSC_USE_INFO)
858         } else {
859           ct++;
860 #endif
861         }
862       }
863 #if defined(PETSC_USE_INFO)
864       if (k > max) max = k;
865 #endif
866     }
867   }
868 
869   /* Print Summary */
870 #if defined(PETSC_USE_INFO)
871   for (i = 0, j = 0; i < ht_size; i++) {
872     if (HT[i]) j++;
873   }
874   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
875 #endif
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
880 {
881   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
882   PetscInt     nstash, reallocs;
883 
884   PetscFunctionBegin;
885   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
886 
887   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
888   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
889   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
890   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
891   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
892   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
893   PetscFunctionReturn(PETSC_SUCCESS);
894 }
895 
896 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
897 {
898   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
899   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
900   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
901   PetscInt    *row, *col;
902   PetscBool    r1, r2, r3, all_assembled;
903   MatScalar   *val;
904   PetscMPIInt  n;
905 
906   PetscFunctionBegin;
907   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
908   if (!baij->donotstash && !mat->nooffprocentries) {
909     while (1) {
910       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
911       if (!flg) break;
912 
913       for (i = 0; i < n;) {
914         /* Now identify the consecutive vals belonging to the same row */
915         for (j = i, rstart = row[j]; j < n; j++) {
916           if (row[j] != rstart) break;
917         }
918         if (j < n) ncols = j - i;
919         else ncols = n - i;
920         /* Now assemble all these values with a single function call */
921         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
922         i = j;
923       }
924     }
925     PetscCall(MatStashScatterEnd_Private(&mat->stash));
926     /* Now process the block-stash. Since the values are stashed column-oriented,
927        set the row-oriented flag to column-oriented, and after MatSetValues()
928        restore the original flags */
929     r1 = baij->roworiented;
930     r2 = a->roworiented;
931     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
932 
933     baij->roworiented                           = PETSC_FALSE;
934     a->roworiented                              = PETSC_FALSE;
935     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
936     while (1) {
937       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
938       if (!flg) break;
939 
940       for (i = 0; i < n;) {
941         /* Now identify the consecutive vals belonging to the same row */
942         for (j = i, rstart = row[j]; j < n; j++) {
943           if (row[j] != rstart) break;
944         }
945         if (j < n) ncols = j - i;
946         else ncols = n - i;
947         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
948         i = j;
949       }
950     }
951     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
952 
953     baij->roworiented                           = r1;
954     a->roworiented                              = r2;
955     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
956   }
957 
958   PetscCall(MatAssemblyBegin(baij->A, mode));
959   PetscCall(MatAssemblyEnd(baij->A, mode));
960 
961   /* determine if any process has disassembled, if so we must
962      also disassemble ourselves, in order that we may reassemble. */
963   /*
964      if nonzero structure of submatrix B cannot change then we know that
965      no process disassembled thus we can skip this stuff
966   */
967   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
968     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
969     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
970   }
971 
972   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
973   PetscCall(MatAssemblyBegin(baij->B, mode));
974   PetscCall(MatAssemblyEnd(baij->B, mode));
975 
976 #if defined(PETSC_USE_INFO)
977   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
978     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
979 
980     baij->ht_total_ct  = 0;
981     baij->ht_insert_ct = 0;
982   }
983 #endif
984   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
985     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
986 
987     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
988     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
989   }
990 
991   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
992 
993   baij->rowvalues = NULL;
994 
995   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
996   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
997     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
998     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
999   }
1000   PetscFunctionReturn(PETSC_SUCCESS);
1001 }
1002 
1003 #include <petscdraw.h>
1004 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1005 {
1006   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1007   PetscMPIInt       rank = baij->rank;
1008   PetscInt          bs   = mat->rmap->bs;
1009   PetscBool         isascii, isdraw;
1010   PetscViewer       sviewer;
1011   PetscViewerFormat format;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1015   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1016   if (isascii) {
1017     PetscCall(PetscViewerGetFormat(viewer, &format));
1018     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1019       MatInfo info;
1020       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1021       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1022       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1023       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1024                                                    mat->rmap->bs, info.memory));
1025       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1026       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1027       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1028       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1029       PetscCall(PetscViewerFlush(viewer));
1030       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1031       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1032       PetscCall(VecScatterView(baij->Mvctx, viewer));
1033       PetscFunctionReturn(PETSC_SUCCESS);
1034     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1035       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1036       PetscFunctionReturn(PETSC_SUCCESS);
1037     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1038       PetscFunctionReturn(PETSC_SUCCESS);
1039     }
1040   }
1041 
1042   if (isdraw) {
1043     PetscDraw draw;
1044     PetscBool isnull;
1045     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1046     PetscCall(PetscDrawIsNull(draw, &isnull));
1047     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1048   }
1049 
1050   {
1051     /* assemble the entire matrix onto first processor. */
1052     Mat          A;
1053     Mat_SeqBAIJ *Aloc;
1054     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1055     MatScalar   *a;
1056     const char  *matname;
1057 
1058     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1059     /* Perhaps this should be the type of mat? */
1060     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1061     if (rank == 0) {
1062       PetscCall(MatSetSizes(A, M, N, M, N));
1063     } else {
1064       PetscCall(MatSetSizes(A, 0, 0, M, N));
1065     }
1066     PetscCall(MatSetType(A, MATMPIBAIJ));
1067     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1068     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1069 
1070     /* copy over the A part */
1071     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1072     ai   = Aloc->i;
1073     aj   = Aloc->j;
1074     a    = Aloc->a;
1075     PetscCall(PetscMalloc1(bs, &rvals));
1076 
1077     for (i = 0; i < mbs; i++) {
1078       rvals[0] = bs * (baij->rstartbs + i);
1079       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1080       for (j = ai[i]; j < ai[i + 1]; j++) {
1081         col = (baij->cstartbs + aj[j]) * bs;
1082         for (k = 0; k < bs; k++) {
1083           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1084           col++;
1085           a += bs;
1086         }
1087       }
1088     }
1089     /* copy over the B part */
1090     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1091     ai   = Aloc->i;
1092     aj   = Aloc->j;
1093     a    = Aloc->a;
1094     for (i = 0; i < mbs; i++) {
1095       rvals[0] = bs * (baij->rstartbs + i);
1096       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1097       for (j = ai[i]; j < ai[i + 1]; j++) {
1098         col = baij->garray[aj[j]] * bs;
1099         for (k = 0; k < bs; k++) {
1100           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1101           col++;
1102           a += bs;
1103         }
1104       }
1105     }
1106     PetscCall(PetscFree(rvals));
1107     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1108     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1109     /*
1110        Everyone has to call to draw the matrix since the graphics waits are
1111        synchronized across all processors that share the PetscDraw object
1112     */
1113     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1114     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1115     if (rank == 0) {
1116       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1117       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1118     }
1119     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1120     PetscCall(MatDestroy(&A));
1121   }
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1126 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1127 {
1128   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1129   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1130   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1131   const PetscInt *garray = aij->garray;
1132   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1133   PetscCount      nz, hnz;
1134   PetscInt       *rowlens, *colidxs;
1135   PetscScalar    *matvals;
1136   PetscMPIInt     rank;
1137 
1138   PetscFunctionBegin;
1139   PetscCall(PetscViewerSetUp(viewer));
1140 
1141   M  = mat->rmap->N;
1142   N  = mat->cmap->N;
1143   m  = mat->rmap->n;
1144   rs = mat->rmap->rstart;
1145   cs = mat->cmap->rstart;
1146   bs = mat->rmap->bs;
1147   nz = bs * bs * (A->nz + B->nz);
1148 
1149   /* write matrix header */
1150   header[0] = MAT_FILE_CLASSID;
1151   header[1] = M;
1152   header[2] = N;
1153   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1154   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1155   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1156   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1157 
1158   /* fill in and store row lengths */
1159   PetscCall(PetscMalloc1(m, &rowlens));
1160   for (cnt = 0, i = 0; i < A->mbs; i++)
1161     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1162   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1163   PetscCall(PetscFree(rowlens));
1164 
1165   /* fill in and store column indices */
1166   PetscCall(PetscMalloc1(nz, &colidxs));
1167   for (cnt = 0, i = 0; i < A->mbs; i++) {
1168     for (k = 0; k < bs; k++) {
1169       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1170         if (garray[B->j[jb]] > cs / bs) break;
1171         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1172       }
1173       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1174         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1175       for (; jb < B->i[i + 1]; jb++)
1176         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1177     }
1178   }
1179   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1180   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1181   PetscCall(PetscFree(colidxs));
1182 
1183   /* fill in and store nonzero values */
1184   PetscCall(PetscMalloc1(nz, &matvals));
1185   for (cnt = 0, i = 0; i < A->mbs; i++) {
1186     for (k = 0; k < bs; k++) {
1187       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1188         if (garray[B->j[jb]] > cs / bs) break;
1189         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1190       }
1191       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1192         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1193       for (; jb < B->i[i + 1]; jb++)
1194         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1195     }
1196   }
1197   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1198   PetscCall(PetscFree(matvals));
1199 
1200   /* write block size option to the viewer's .info file */
1201   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1202   PetscFunctionReturn(PETSC_SUCCESS);
1203 }
1204 
1205 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1206 {
1207   PetscBool isascii, isdraw, issocket, isbinary;
1208 
1209   PetscFunctionBegin;
1210   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1211   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1212   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1213   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1214   if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1215   else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1216   PetscFunctionReturn(PETSC_SUCCESS);
1217 }
1218 
1219 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1220 {
1221   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1222   PetscInt     nt;
1223 
1224   PetscFunctionBegin;
1225   PetscCall(VecGetLocalSize(xx, &nt));
1226   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1227   PetscCall(VecGetLocalSize(yy, &nt));
1228   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1229   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1230   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1231   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1232   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1233   PetscFunctionReturn(PETSC_SUCCESS);
1234 }
1235 
1236 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1237 {
1238   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1239 
1240   PetscFunctionBegin;
1241   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1242   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1243   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1244   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1245   PetscFunctionReturn(PETSC_SUCCESS);
1246 }
1247 
1248 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1249 {
1250   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1251 
1252   PetscFunctionBegin;
1253   /* do nondiagonal part */
1254   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1255   /* do local part */
1256   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1257   /* add partial results together */
1258   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1259   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1260   PetscFunctionReturn(PETSC_SUCCESS);
1261 }
1262 
1263 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1264 {
1265   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1266 
1267   PetscFunctionBegin;
1268   /* do nondiagonal part */
1269   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1270   /* do local part */
1271   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1272   /* add partial results together */
1273   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1274   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1275   PetscFunctionReturn(PETSC_SUCCESS);
1276 }
1277 
1278 /*
1279   This only works correctly for square matrices where the subblock A->A is the
1280    diagonal block
1281 */
1282 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1283 {
1284   PetscFunctionBegin;
1285   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1286   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1287   PetscFunctionReturn(PETSC_SUCCESS);
1288 }
1289 
1290 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1291 {
1292   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1293 
1294   PetscFunctionBegin;
1295   PetscCall(MatScale(a->A, aa));
1296   PetscCall(MatScale(a->B, aa));
1297   PetscFunctionReturn(PETSC_SUCCESS);
1298 }
1299 
1300 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1301 {
1302   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1303   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1304   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1305   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1306   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1307 
1308   PetscFunctionBegin;
1309   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1310   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1311   mat->getrowactive = PETSC_TRUE;
1312 
1313   if (!mat->rowvalues && (idx || v)) {
1314     /*
1315         allocate enough space to hold information from the longest row.
1316     */
1317     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1318     PetscInt     max = 1, mbs = mat->mbs, tmp;
1319     for (i = 0; i < mbs; i++) {
1320       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1321       if (max < tmp) max = tmp;
1322     }
1323     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1324   }
1325   lrow = row - brstart;
1326 
1327   pvA = &vworkA;
1328   pcA = &cworkA;
1329   pvB = &vworkB;
1330   pcB = &cworkB;
1331   if (!v) {
1332     pvA = NULL;
1333     pvB = NULL;
1334   }
1335   if (!idx) {
1336     pcA = NULL;
1337     if (!v) pcB = NULL;
1338   }
1339   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1340   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1341   nztot = nzA + nzB;
1342 
1343   cmap = mat->garray;
1344   if (v || idx) {
1345     if (nztot) {
1346       /* Sort by increasing column numbers, assuming A and B already sorted */
1347       PetscInt imark = -1;
1348       if (v) {
1349         *v = v_p = mat->rowvalues;
1350         for (i = 0; i < nzB; i++) {
1351           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1352           else break;
1353         }
1354         imark = i;
1355         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1356         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1357       }
1358       if (idx) {
1359         *idx = idx_p = mat->rowindices;
1360         if (imark > -1) {
1361           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1362         } else {
1363           for (i = 0; i < nzB; i++) {
1364             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1365             else break;
1366           }
1367           imark = i;
1368         }
1369         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1370         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1371       }
1372     } else {
1373       if (idx) *idx = NULL;
1374       if (v) *v = NULL;
1375     }
1376   }
1377   *nz = nztot;
1378   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1379   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1384 {
1385   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1386 
1387   PetscFunctionBegin;
1388   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1389   baij->getrowactive = PETSC_FALSE;
1390   PetscFunctionReturn(PETSC_SUCCESS);
1391 }
1392 
1393 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1394 {
1395   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1396 
1397   PetscFunctionBegin;
1398   PetscCall(MatZeroEntries(l->A));
1399   PetscCall(MatZeroEntries(l->B));
1400   PetscFunctionReturn(PETSC_SUCCESS);
1401 }
1402 
1403 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1404 {
1405   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1406   Mat            A = a->A, B = a->B;
1407   PetscLogDouble isend[5], irecv[5];
1408 
1409   PetscFunctionBegin;
1410   info->block_size = (PetscReal)matin->rmap->bs;
1411 
1412   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1413 
1414   isend[0] = info->nz_used;
1415   isend[1] = info->nz_allocated;
1416   isend[2] = info->nz_unneeded;
1417   isend[3] = info->memory;
1418   isend[4] = info->mallocs;
1419 
1420   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1421 
1422   isend[0] += info->nz_used;
1423   isend[1] += info->nz_allocated;
1424   isend[2] += info->nz_unneeded;
1425   isend[3] += info->memory;
1426   isend[4] += info->mallocs;
1427 
1428   if (flag == MAT_LOCAL) {
1429     info->nz_used      = isend[0];
1430     info->nz_allocated = isend[1];
1431     info->nz_unneeded  = isend[2];
1432     info->memory       = isend[3];
1433     info->mallocs      = isend[4];
1434   } else if (flag == MAT_GLOBAL_MAX) {
1435     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1436 
1437     info->nz_used      = irecv[0];
1438     info->nz_allocated = irecv[1];
1439     info->nz_unneeded  = irecv[2];
1440     info->memory       = irecv[3];
1441     info->mallocs      = irecv[4];
1442   } else if (flag == MAT_GLOBAL_SUM) {
1443     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1444 
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1451   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1452   info->fill_ratio_needed = 0;
1453   info->factor_mallocs    = 0;
1454   PetscFunctionReturn(PETSC_SUCCESS);
1455 }
1456 
1457 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1458 {
1459   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1460 
1461   PetscFunctionBegin;
1462   switch (op) {
1463   case MAT_NEW_NONZERO_LOCATIONS:
1464   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1465   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1466   case MAT_KEEP_NONZERO_PATTERN:
1467   case MAT_NEW_NONZERO_LOCATION_ERR:
1468     MatCheckPreallocated(A, 1);
1469     PetscCall(MatSetOption(a->A, op, flg));
1470     PetscCall(MatSetOption(a->B, op, flg));
1471     break;
1472   case MAT_ROW_ORIENTED:
1473     MatCheckPreallocated(A, 1);
1474     a->roworiented = flg;
1475 
1476     PetscCall(MatSetOption(a->A, op, flg));
1477     PetscCall(MatSetOption(a->B, op, flg));
1478     break;
1479   case MAT_IGNORE_OFF_PROC_ENTRIES:
1480     a->donotstash = flg;
1481     break;
1482   case MAT_USE_HASH_TABLE:
1483     a->ht_flag = flg;
1484     a->ht_fact = 1.39;
1485     break;
1486   case MAT_SYMMETRIC:
1487   case MAT_STRUCTURALLY_SYMMETRIC:
1488   case MAT_HERMITIAN:
1489   case MAT_SYMMETRY_ETERNAL:
1490   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1491   case MAT_SPD_ETERNAL:
1492     /* if the diagonal matrix is square it inherits some of the properties above */
1493     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1494     break;
1495   default:
1496     break;
1497   }
1498   PetscFunctionReturn(PETSC_SUCCESS);
1499 }
1500 
1501 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1502 {
1503   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1504   Mat_SeqBAIJ *Aloc;
1505   Mat          B;
1506   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1507   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1508   MatScalar   *a;
1509 
1510   PetscFunctionBegin;
1511   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1512   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1513     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1514     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1515     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1516     /* Do not know preallocation information, but must set block size */
1517     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1518   } else {
1519     B = *matout;
1520   }
1521 
1522   /* copy over the A part */
1523   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1524   ai   = Aloc->i;
1525   aj   = Aloc->j;
1526   a    = Aloc->a;
1527   PetscCall(PetscMalloc1(bs, &rvals));
1528 
1529   for (i = 0; i < mbs; i++) {
1530     rvals[0] = bs * (baij->rstartbs + i);
1531     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1532     for (j = ai[i]; j < ai[i + 1]; j++) {
1533       col = (baij->cstartbs + aj[j]) * bs;
1534       for (k = 0; k < bs; k++) {
1535         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1536 
1537         col++;
1538         a += bs;
1539       }
1540     }
1541   }
1542   /* copy over the B part */
1543   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1544   ai   = Aloc->i;
1545   aj   = Aloc->j;
1546   a    = Aloc->a;
1547   for (i = 0; i < mbs; i++) {
1548     rvals[0] = bs * (baij->rstartbs + i);
1549     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1550     for (j = ai[i]; j < ai[i + 1]; j++) {
1551       col = baij->garray[aj[j]] * bs;
1552       for (k = 0; k < bs; k++) {
1553         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1554         col++;
1555         a += bs;
1556       }
1557     }
1558   }
1559   PetscCall(PetscFree(rvals));
1560   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1561   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1562 
1563   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1564   else PetscCall(MatHeaderMerge(A, &B));
1565   PetscFunctionReturn(PETSC_SUCCESS);
1566 }
1567 
1568 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1569 {
1570   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1571   Mat          a = baij->A, b = baij->B;
1572   PetscInt     s1, s2, s3;
1573 
1574   PetscFunctionBegin;
1575   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1576   if (rr) {
1577     PetscCall(VecGetLocalSize(rr, &s1));
1578     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1579     /* Overlap communication with computation. */
1580     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1581   }
1582   if (ll) {
1583     PetscCall(VecGetLocalSize(ll, &s1));
1584     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1585     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1586   }
1587   /* scale  the diagonal block */
1588   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1589 
1590   if (rr) {
1591     /* Do a scatter end and then right scale the off-diagonal block */
1592     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1594   }
1595   PetscFunctionReturn(PETSC_SUCCESS);
1596 }
1597 
1598 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1599 {
1600   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1601   PetscInt    *lrows;
1602   PetscInt     r, len;
1603   PetscBool    cong;
1604 
1605   PetscFunctionBegin;
1606   /* get locally owned rows */
1607   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1608   /* fix right-hand side if needed */
1609   if (x && b) {
1610     const PetscScalar *xx;
1611     PetscScalar       *bb;
1612 
1613     PetscCall(VecGetArrayRead(x, &xx));
1614     PetscCall(VecGetArray(b, &bb));
1615     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1616     PetscCall(VecRestoreArrayRead(x, &xx));
1617     PetscCall(VecRestoreArray(b, &bb));
1618   }
1619 
1620   /* actually zap the local rows */
1621   /*
1622         Zero the required rows. If the "diagonal block" of the matrix
1623      is square and the user wishes to set the diagonal we use separate
1624      code so that MatSetValues() is not called for each diagonal allocating
1625      new memory, thus calling lots of mallocs and slowing things down.
1626 
1627   */
1628   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1629   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1630   PetscCall(MatHasCongruentLayouts(A, &cong));
1631   if ((diag != 0.0) && cong) {
1632     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1633   } else if (diag != 0.0) {
1634     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1635     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1636     for (r = 0; r < len; ++r) {
1637       const PetscInt row = lrows[r] + A->rmap->rstart;
1638       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1639     }
1640     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1641     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1642   } else {
1643     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1644   }
1645   PetscCall(PetscFree(lrows));
1646 
1647   /* only change matrix nonzero state if pattern was allowed to be changed */
1648   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1649     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1650     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1651   }
1652   PetscFunctionReturn(PETSC_SUCCESS);
1653 }
1654 
1655 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1656 {
1657   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1658   PetscMPIInt        n, p = 0;
1659   PetscInt           i, j, k, r, len = 0, row, col, count;
1660   PetscInt          *lrows, *owners = A->rmap->range;
1661   PetscSFNode       *rrows;
1662   PetscSF            sf;
1663   const PetscScalar *xx;
1664   PetscScalar       *bb, *mask;
1665   Vec                xmask, lmask;
1666   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1667   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1668   PetscScalar       *aa;
1669 
1670   PetscFunctionBegin;
1671   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1672   /* Create SF where leaves are input rows and roots are owned rows */
1673   PetscCall(PetscMalloc1(n, &lrows));
1674   for (r = 0; r < n; ++r) lrows[r] = -1;
1675   PetscCall(PetscMalloc1(N, &rrows));
1676   for (r = 0; r < N; ++r) {
1677     const PetscInt idx = rows[r];
1678     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1679     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1680       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1681     }
1682     rrows[r].rank  = p;
1683     rrows[r].index = rows[r] - owners[p];
1684   }
1685   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1686   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1687   /* Collect flags for rows to be zeroed */
1688   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1689   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1690   PetscCall(PetscSFDestroy(&sf));
1691   /* Compress and put in row numbers */
1692   for (r = 0; r < n; ++r)
1693     if (lrows[r] >= 0) lrows[len++] = r;
1694   /* zero diagonal part of matrix */
1695   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1696   /* handle off-diagonal part of matrix */
1697   PetscCall(MatCreateVecs(A, &xmask, NULL));
1698   PetscCall(VecDuplicate(l->lvec, &lmask));
1699   PetscCall(VecGetArray(xmask, &bb));
1700   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1701   PetscCall(VecRestoreArray(xmask, &bb));
1702   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1703   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1704   PetscCall(VecDestroy(&xmask));
1705   if (x) {
1706     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1707     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1708     PetscCall(VecGetArrayRead(l->lvec, &xx));
1709     PetscCall(VecGetArray(b, &bb));
1710   }
1711   PetscCall(VecGetArray(lmask, &mask));
1712   /* remove zeroed rows of off-diagonal matrix */
1713   for (i = 0; i < len; ++i) {
1714     row   = lrows[i];
1715     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1716     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1717     for (k = 0; k < count; ++k) {
1718       aa[0] = 0.0;
1719       aa += bs;
1720     }
1721   }
1722   /* loop over all elements of off process part of matrix zeroing removed columns*/
1723   for (i = 0; i < l->B->rmap->N; ++i) {
1724     row = i / bs;
1725     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1726       for (k = 0; k < bs; ++k) {
1727         col = bs * baij->j[j] + k;
1728         if (PetscAbsScalar(mask[col])) {
1729           aa = baij->a + j * bs2 + (i % bs) + bs * k;
1730           if (x) bb[i] -= aa[0] * xx[col];
1731           aa[0] = 0.0;
1732         }
1733       }
1734     }
1735   }
1736   if (x) {
1737     PetscCall(VecRestoreArray(b, &bb));
1738     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1739   }
1740   PetscCall(VecRestoreArray(lmask, &mask));
1741   PetscCall(VecDestroy(&lmask));
1742   PetscCall(PetscFree(lrows));
1743 
1744   /* only change matrix nonzero state if pattern was allowed to be changed */
1745   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1746     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1747     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1748   }
1749   PetscFunctionReturn(PETSC_SUCCESS);
1750 }
1751 
1752 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1753 {
1754   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1755 
1756   PetscFunctionBegin;
1757   PetscCall(MatSetUnfactored(a->A));
1758   PetscFunctionReturn(PETSC_SUCCESS);
1759 }
1760 
1761 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1762 
1763 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1764 {
1765   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1766   Mat          a, b, c, d;
1767   PetscBool    flg;
1768 
1769   PetscFunctionBegin;
1770   a = matA->A;
1771   b = matA->B;
1772   c = matB->A;
1773   d = matB->B;
1774 
1775   PetscCall(MatEqual(a, c, &flg));
1776   if (flg) PetscCall(MatEqual(b, d, &flg));
1777   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1778   PetscFunctionReturn(PETSC_SUCCESS);
1779 }
1780 
1781 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1782 {
1783   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1784   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1785 
1786   PetscFunctionBegin;
1787   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1788   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1789     PetscCall(MatCopy_Basic(A, B, str));
1790   } else {
1791     PetscCall(MatCopy(a->A, b->A, str));
1792     PetscCall(MatCopy(a->B, b->B, str));
1793   }
1794   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1795   PetscFunctionReturn(PETSC_SUCCESS);
1796 }
1797 
1798 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1799 {
1800   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1801   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1802   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1803 
1804   PetscFunctionBegin;
1805   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1806   PetscFunctionReturn(PETSC_SUCCESS);
1807 }
1808 
1809 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1810 {
1811   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1812   PetscBLASInt bnz, one                         = 1;
1813   Mat_SeqBAIJ *x, *y;
1814   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1815 
1816   PetscFunctionBegin;
1817   if (str == SAME_NONZERO_PATTERN) {
1818     PetscScalar alpha = a;
1819     x                 = (Mat_SeqBAIJ *)xx->A->data;
1820     y                 = (Mat_SeqBAIJ *)yy->A->data;
1821     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1822     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1823     x = (Mat_SeqBAIJ *)xx->B->data;
1824     y = (Mat_SeqBAIJ *)yy->B->data;
1825     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1826     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1827     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1828   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1829     PetscCall(MatAXPY_Basic(Y, a, X, str));
1830   } else {
1831     Mat       B;
1832     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1833     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1834     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1835     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1836     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1837     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1838     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1839     PetscCall(MatSetType(B, MATMPIBAIJ));
1840     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1841     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1842     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1843     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1844     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1845     PetscCall(MatHeaderMerge(Y, &B));
1846     PetscCall(PetscFree(nnz_d));
1847     PetscCall(PetscFree(nnz_o));
1848   }
1849   PetscFunctionReturn(PETSC_SUCCESS);
1850 }
1851 
1852 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1853 {
1854   PetscFunctionBegin;
1855   if (PetscDefined(USE_COMPLEX)) {
1856     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1857 
1858     PetscCall(MatConjugate_SeqBAIJ(a->A));
1859     PetscCall(MatConjugate_SeqBAIJ(a->B));
1860   }
1861   PetscFunctionReturn(PETSC_SUCCESS);
1862 }
1863 
1864 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1865 {
1866   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1867 
1868   PetscFunctionBegin;
1869   PetscCall(MatRealPart(a->A));
1870   PetscCall(MatRealPart(a->B));
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1875 {
1876   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1877 
1878   PetscFunctionBegin;
1879   PetscCall(MatImaginaryPart(a->A));
1880   PetscCall(MatImaginaryPart(a->B));
1881   PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883 
1884 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1885 {
1886   IS       iscol_local;
1887   PetscInt csize;
1888 
1889   PetscFunctionBegin;
1890   PetscCall(ISGetLocalSize(iscol, &csize));
1891   if (call == MAT_REUSE_MATRIX) {
1892     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1893     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1894   } else {
1895     PetscCall(ISAllGather(iscol, &iscol_local));
1896   }
1897   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1898   if (call == MAT_INITIAL_MATRIX) {
1899     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1900     PetscCall(ISDestroy(&iscol_local));
1901   }
1902   PetscFunctionReturn(PETSC_SUCCESS);
1903 }
1904 
1905 /*
1906   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1907   in local and then by concatenating the local matrices the end result.
1908   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1909   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1910 */
1911 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1912 {
1913   PetscMPIInt  rank, size;
1914   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1915   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1916   Mat          M, Mreuse;
1917   MatScalar   *vwork, *aa;
1918   MPI_Comm     comm;
1919   IS           isrow_new, iscol_new;
1920   Mat_SeqBAIJ *aij;
1921 
1922   PetscFunctionBegin;
1923   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1924   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1925   PetscCallMPI(MPI_Comm_size(comm, &size));
1926   /* The compression and expansion should be avoided. Doesn't point
1927      out errors, might change the indices, hence buggey */
1928   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1929   if (isrow == iscol) {
1930     iscol_new = isrow_new;
1931     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1932   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1933 
1934   if (call == MAT_REUSE_MATRIX) {
1935     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1936     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1937     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1938   } else {
1939     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1940   }
1941   PetscCall(ISDestroy(&isrow_new));
1942   PetscCall(ISDestroy(&iscol_new));
1943   /*
1944       m - number of local rows
1945       n - number of columns (same on all processors)
1946       rstart - first row in new global matrix generated
1947   */
1948   PetscCall(MatGetBlockSize(mat, &bs));
1949   PetscCall(MatGetSize(Mreuse, &m, &n));
1950   m = m / bs;
1951   n = n / bs;
1952 
1953   if (call == MAT_INITIAL_MATRIX) {
1954     aij = (Mat_SeqBAIJ *)Mreuse->data;
1955     ii  = aij->i;
1956     jj  = aij->j;
1957 
1958     /*
1959         Determine the number of non-zeros in the diagonal and off-diagonal
1960         portions of the matrix in order to do correct preallocation
1961     */
1962 
1963     /* first get start and end of "diagonal" columns */
1964     if (csize == PETSC_DECIDE) {
1965       PetscCall(ISGetSize(isrow, &mglobal));
1966       if (mglobal == n * bs) { /* square matrix */
1967         nlocal = m;
1968       } else {
1969         nlocal = n / size + ((n % size) > rank);
1970       }
1971     } else {
1972       nlocal = csize / bs;
1973     }
1974     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1975     rstart = rend - nlocal;
1976     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1977 
1978     /* next, compute all the lengths */
1979     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1980     for (i = 0; i < m; i++) {
1981       jend = ii[i + 1] - ii[i];
1982       olen = 0;
1983       dlen = 0;
1984       for (j = 0; j < jend; j++) {
1985         if (*jj < rstart || *jj >= rend) olen++;
1986         else dlen++;
1987         jj++;
1988       }
1989       olens[i] = olen;
1990       dlens[i] = dlen;
1991     }
1992     PetscCall(MatCreate(comm, &M));
1993     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1994     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1995     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1996     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1997     PetscCall(PetscFree2(dlens, olens));
1998   } else {
1999     PetscInt ml, nl;
2000 
2001     M = *newmat;
2002     PetscCall(MatGetLocalSize(M, &ml, &nl));
2003     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2004     PetscCall(MatZeroEntries(M));
2005     /*
2006          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2007        rather than the slower MatSetValues().
2008     */
2009     M->was_assembled = PETSC_TRUE;
2010     M->assembled     = PETSC_FALSE;
2011   }
2012   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2013   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2014   aij = (Mat_SeqBAIJ *)Mreuse->data;
2015   ii  = aij->i;
2016   jj  = aij->j;
2017   aa  = aij->a;
2018   for (i = 0; i < m; i++) {
2019     row   = rstart / bs + i;
2020     nz    = ii[i + 1] - ii[i];
2021     cwork = jj;
2022     jj    = PetscSafePointerPlusOffset(jj, nz);
2023     vwork = aa;
2024     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2025     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2026   }
2027 
2028   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2029   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2030   *newmat = M;
2031 
2032   /* save submatrix used in processor for next request */
2033   if (call == MAT_INITIAL_MATRIX) {
2034     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2035     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2036   }
2037   PetscFunctionReturn(PETSC_SUCCESS);
2038 }
2039 
2040 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2041 {
2042   MPI_Comm        comm, pcomm;
2043   PetscInt        clocal_size, nrows;
2044   const PetscInt *rows;
2045   PetscMPIInt     size;
2046   IS              crowp, lcolp;
2047 
2048   PetscFunctionBegin;
2049   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2050   /* make a collective version of 'rowp' */
2051   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2052   if (pcomm == comm) {
2053     crowp = rowp;
2054   } else {
2055     PetscCall(ISGetSize(rowp, &nrows));
2056     PetscCall(ISGetIndices(rowp, &rows));
2057     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2058     PetscCall(ISRestoreIndices(rowp, &rows));
2059   }
2060   PetscCall(ISSetPermutation(crowp));
2061   /* make a local version of 'colp' */
2062   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2063   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2064   if (size == 1) {
2065     lcolp = colp;
2066   } else {
2067     PetscCall(ISAllGather(colp, &lcolp));
2068   }
2069   PetscCall(ISSetPermutation(lcolp));
2070   /* now we just get the submatrix */
2071   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2072   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2073   /* clean up */
2074   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2075   if (size > 1) PetscCall(ISDestroy(&lcolp));
2076   PetscFunctionReturn(PETSC_SUCCESS);
2077 }
2078 
2079 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2080 {
2081   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2082   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2083 
2084   PetscFunctionBegin;
2085   if (nghosts) *nghosts = B->nbs;
2086   if (ghosts) *ghosts = baij->garray;
2087   PetscFunctionReturn(PETSC_SUCCESS);
2088 }
2089 
2090 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2091 {
2092   Mat          B;
2093   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2094   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2095   Mat_SeqAIJ  *b;
2096   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2097   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2098   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2099 
2100   PetscFunctionBegin;
2101   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2102   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2103 
2104   /*   Tell every processor the number of nonzeros per row  */
2105   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2106   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2107   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2108   displs = recvcounts + size;
2109   for (i = 0; i < size; i++) {
2110     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2111     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2112   }
2113   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2114   /* Create the sequential matrix of the same type as the local block diagonal  */
2115   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2116   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2117   PetscCall(MatSetType(B, MATSEQAIJ));
2118   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2119   b = (Mat_SeqAIJ *)B->data;
2120 
2121   /*     Copy my part of matrix column indices over  */
2122   sendcount  = ad->nz + bd->nz;
2123   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2124   a_jsendbuf = ad->j;
2125   b_jsendbuf = bd->j;
2126   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2127   cnt        = 0;
2128   for (i = 0; i < n; i++) {
2129     /* put in lower diagonal portion */
2130     m = bd->i[i + 1] - bd->i[i];
2131     while (m > 0) {
2132       /* is it above diagonal (in bd (compressed) numbering) */
2133       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2134       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2135       m--;
2136     }
2137 
2138     /* put in diagonal portion */
2139     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2140 
2141     /* put in upper diagonal portion */
2142     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2143   }
2144   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2145 
2146   /*  Gather all column indices to all processors  */
2147   for (i = 0; i < size; i++) {
2148     recvcounts[i] = 0;
2149     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2150   }
2151   displs[0] = 0;
2152   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2153   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2154   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2155   /* set the b->ilen (length of each row) values */
2156   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2157   /* set the b->i indices */
2158   b->i[0] = 0;
2159   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2160   PetscCall(PetscFree(lens));
2161   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2162   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2163   PetscCall(PetscFree(recvcounts));
2164 
2165   PetscCall(MatPropagateSymmetryOptions(A, B));
2166   *newmat = B;
2167   PetscFunctionReturn(PETSC_SUCCESS);
2168 }
2169 
2170 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2171 {
2172   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2173   Vec          bb1 = NULL;
2174 
2175   PetscFunctionBegin;
2176   if (flag == SOR_APPLY_UPPER) {
2177     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2178     PetscFunctionReturn(PETSC_SUCCESS);
2179   }
2180 
2181   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2182 
2183   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2184     if (flag & SOR_ZERO_INITIAL_GUESS) {
2185       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2186       its--;
2187     }
2188 
2189     while (its--) {
2190       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2191       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2192 
2193       /* update rhs: bb1 = bb - B*x */
2194       PetscCall(VecScale(mat->lvec, -1.0));
2195       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2196 
2197       /* local sweep */
2198       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2199     }
2200   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2201     if (flag & SOR_ZERO_INITIAL_GUESS) {
2202       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2203       its--;
2204     }
2205     while (its--) {
2206       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2207       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2208 
2209       /* update rhs: bb1 = bb - B*x */
2210       PetscCall(VecScale(mat->lvec, -1.0));
2211       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2212 
2213       /* local sweep */
2214       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2215     }
2216   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2217     if (flag & SOR_ZERO_INITIAL_GUESS) {
2218       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2219       its--;
2220     }
2221     while (its--) {
2222       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2223       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2224 
2225       /* update rhs: bb1 = bb - B*x */
2226       PetscCall(VecScale(mat->lvec, -1.0));
2227       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2228 
2229       /* local sweep */
2230       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2231     }
2232   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2233 
2234   PetscCall(VecDestroy(&bb1));
2235   PetscFunctionReturn(PETSC_SUCCESS);
2236 }
2237 
2238 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2239 {
2240   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2241   PetscInt     m, N, i, *garray = aij->garray;
2242   PetscInt     ib, jb, bs = A->rmap->bs;
2243   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2244   MatScalar   *a_val = a_aij->a;
2245   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2246   MatScalar   *b_val = b_aij->a;
2247   PetscReal   *work;
2248 
2249   PetscFunctionBegin;
2250   PetscCall(MatGetSize(A, &m, &N));
2251   PetscCall(PetscCalloc1(N, &work));
2252   if (type == NORM_2) {
2253     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2254       for (jb = 0; jb < bs; jb++) {
2255         for (ib = 0; ib < bs; ib++) {
2256           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2257           a_val++;
2258         }
2259       }
2260     }
2261     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2262       for (jb = 0; jb < bs; jb++) {
2263         for (ib = 0; ib < bs; ib++) {
2264           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2265           b_val++;
2266         }
2267       }
2268     }
2269   } else if (type == NORM_1) {
2270     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2271       for (jb = 0; jb < bs; jb++) {
2272         for (ib = 0; ib < bs; ib++) {
2273           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2274           a_val++;
2275         }
2276       }
2277     }
2278     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2279       for (jb = 0; jb < bs; jb++) {
2280         for (ib = 0; ib < bs; ib++) {
2281           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2282           b_val++;
2283         }
2284       }
2285     }
2286   } else if (type == NORM_INFINITY) {
2287     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2288       for (jb = 0; jb < bs; jb++) {
2289         for (ib = 0; ib < bs; ib++) {
2290           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2291           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2292           a_val++;
2293         }
2294       }
2295     }
2296     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2297       for (jb = 0; jb < bs; jb++) {
2298         for (ib = 0; ib < bs; ib++) {
2299           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2300           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2301           b_val++;
2302         }
2303       }
2304     }
2305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2306     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2307       for (jb = 0; jb < bs; jb++) {
2308         for (ib = 0; ib < bs; ib++) {
2309           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2310           a_val++;
2311         }
2312       }
2313     }
2314     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2315       for (jb = 0; jb < bs; jb++) {
2316         for (ib = 0; ib < bs; ib++) {
2317           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2318           b_val++;
2319         }
2320       }
2321     }
2322   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2323     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2324       for (jb = 0; jb < bs; jb++) {
2325         for (ib = 0; ib < bs; ib++) {
2326           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2327           a_val++;
2328         }
2329       }
2330     }
2331     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2332       for (jb = 0; jb < bs; jb++) {
2333         for (ib = 0; ib < bs; ib++) {
2334           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2335           b_val++;
2336         }
2337       }
2338     }
2339   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2340   if (type == NORM_INFINITY) {
2341     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2342   } else {
2343     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2344   }
2345   PetscCall(PetscFree(work));
2346   if (type == NORM_2) {
2347     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2348   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2349     for (i = 0; i < N; i++) reductions[i] /= m;
2350   }
2351   PetscFunctionReturn(PETSC_SUCCESS);
2352 }
2353 
2354 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2355 {
2356   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2357 
2358   PetscFunctionBegin;
2359   PetscCall(MatInvertBlockDiagonal(a->A, values));
2360   A->factorerrortype             = a->A->factorerrortype;
2361   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2362   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2363   PetscFunctionReturn(PETSC_SUCCESS);
2364 }
2365 
2366 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2367 {
2368   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2369   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2370 
2371   PetscFunctionBegin;
2372   if (!Y->preallocated) {
2373     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2374   } else if (!aij->nz) {
2375     PetscInt nonew = aij->nonew;
2376     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2377     aij->nonew = nonew;
2378   }
2379   PetscCall(MatShift_Basic(Y, a));
2380   PetscFunctionReturn(PETSC_SUCCESS);
2381 }
2382 
2383 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2384 {
2385   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2386 
2387   PetscFunctionBegin;
2388   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2389   PetscCall(MatMissingDiagonal(a->A, missing, d));
2390   if (d) {
2391     PetscInt rstart;
2392     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2393     *d += rstart / A->rmap->bs;
2394   }
2395   PetscFunctionReturn(PETSC_SUCCESS);
2396 }
2397 
2398 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2399 {
2400   PetscFunctionBegin;
2401   *a = ((Mat_MPIBAIJ *)A->data)->A;
2402   PetscFunctionReturn(PETSC_SUCCESS);
2403 }
2404 
2405 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2406 {
2407   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2408 
2409   PetscFunctionBegin;
2410   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2411   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2412   PetscFunctionReturn(PETSC_SUCCESS);
2413 }
2414 
2415 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2416                                        MatGetRow_MPIBAIJ,
2417                                        MatRestoreRow_MPIBAIJ,
2418                                        MatMult_MPIBAIJ,
2419                                        /* 4*/ MatMultAdd_MPIBAIJ,
2420                                        MatMultTranspose_MPIBAIJ,
2421                                        MatMultTransposeAdd_MPIBAIJ,
2422                                        NULL,
2423                                        NULL,
2424                                        NULL,
2425                                        /*10*/ NULL,
2426                                        NULL,
2427                                        NULL,
2428                                        MatSOR_MPIBAIJ,
2429                                        MatTranspose_MPIBAIJ,
2430                                        /*15*/ MatGetInfo_MPIBAIJ,
2431                                        MatEqual_MPIBAIJ,
2432                                        MatGetDiagonal_MPIBAIJ,
2433                                        MatDiagonalScale_MPIBAIJ,
2434                                        MatNorm_MPIBAIJ,
2435                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2436                                        MatAssemblyEnd_MPIBAIJ,
2437                                        MatSetOption_MPIBAIJ,
2438                                        MatZeroEntries_MPIBAIJ,
2439                                        /*24*/ MatZeroRows_MPIBAIJ,
2440                                        NULL,
2441                                        NULL,
2442                                        NULL,
2443                                        NULL,
2444                                        /*29*/ MatSetUp_MPI_Hash,
2445                                        NULL,
2446                                        NULL,
2447                                        MatGetDiagonalBlock_MPIBAIJ,
2448                                        NULL,
2449                                        /*34*/ MatDuplicate_MPIBAIJ,
2450                                        NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        NULL,
2454                                        /*39*/ MatAXPY_MPIBAIJ,
2455                                        MatCreateSubMatrices_MPIBAIJ,
2456                                        MatIncreaseOverlap_MPIBAIJ,
2457                                        MatGetValues_MPIBAIJ,
2458                                        MatCopy_MPIBAIJ,
2459                                        /*44*/ NULL,
2460                                        MatScale_MPIBAIJ,
2461                                        MatShift_MPIBAIJ,
2462                                        NULL,
2463                                        MatZeroRowsColumns_MPIBAIJ,
2464                                        /*49*/ NULL,
2465                                        NULL,
2466                                        NULL,
2467                                        NULL,
2468                                        NULL,
2469                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2470                                        NULL,
2471                                        MatSetUnfactored_MPIBAIJ,
2472                                        MatPermute_MPIBAIJ,
2473                                        MatSetValuesBlocked_MPIBAIJ,
2474                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2475                                        MatDestroy_MPIBAIJ,
2476                                        MatView_MPIBAIJ,
2477                                        NULL,
2478                                        NULL,
2479                                        /*64*/ NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        NULL,
2483                                        MatGetRowMaxAbs_MPIBAIJ,
2484                                        /*69*/ NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        MatFDColoringApply_BAIJ,
2488                                        NULL,
2489                                        /*74*/ NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        MatLoad_MPIBAIJ,
2494                                        /*79*/ NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        /*84*/ NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        /*89*/ NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        MatConjugate_MPIBAIJ,
2509                                        /*94*/ NULL,
2510                                        NULL,
2511                                        MatRealPart_MPIBAIJ,
2512                                        MatImaginaryPart_MPIBAIJ,
2513                                        NULL,
2514                                        /*99*/ NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        /*104*/ MatMissingDiagonal_MPIBAIJ,
2520                                        MatGetSeqNonzeroStructure_MPIBAIJ,
2521                                        NULL,
2522                                        MatGetGhosts_MPIBAIJ,
2523                                        NULL,
2524                                        /*109*/ NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        /*114*/ MatGetMultiProcBlock_MPIBAIJ,
2530                                        NULL,
2531                                        MatGetColumnReductions_MPIBAIJ,
2532                                        MatInvertBlockDiagonal_MPIBAIJ,
2533                                        NULL,
2534                                        /*119*/ NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        /*124*/ NULL,
2540                                        NULL,
2541                                        NULL,
2542                                        MatSetBlockSizes_Default,
2543                                        NULL,
2544                                        /*129*/ MatFDColoringSetUp_MPIXAIJ,
2545                                        NULL,
2546                                        MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2547                                        NULL,
2548                                        NULL,
2549                                        /*134*/ NULL,
2550                                        NULL,
2551                                        NULL,
2552                                        MatEliminateZeros_MPIBAIJ,
2553                                        MatGetRowSumAbs_MPIBAIJ,
2554                                        /*139*/ NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        MatCopyHashToXAIJ_MPI_Hash,
2558                                        NULL};
2559 
2560 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2561 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2562 
2563 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2564 {
2565   PetscInt        m, rstart, cstart, cend;
2566   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2567   const PetscInt *JJ          = NULL;
2568   PetscScalar    *values      = NULL;
2569   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2570   PetscBool       nooffprocentries;
2571 
2572   PetscFunctionBegin;
2573   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2574   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2575   PetscCall(PetscLayoutSetUp(B->rmap));
2576   PetscCall(PetscLayoutSetUp(B->cmap));
2577   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2578   m      = B->rmap->n / bs;
2579   rstart = B->rmap->rstart / bs;
2580   cstart = B->cmap->rstart / bs;
2581   cend   = B->cmap->rend / bs;
2582 
2583   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2584   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2585   for (i = 0; i < m; i++) {
2586     nz = ii[i + 1] - ii[i];
2587     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2588     nz_max = PetscMax(nz_max, nz);
2589     dlen   = 0;
2590     olen   = 0;
2591     JJ     = jj + ii[i];
2592     for (j = 0; j < nz; j++) {
2593       if (*JJ < cstart || *JJ >= cend) olen++;
2594       else dlen++;
2595       JJ++;
2596     }
2597     d_nnz[i] = dlen;
2598     o_nnz[i] = olen;
2599   }
2600   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2601   PetscCall(PetscFree2(d_nnz, o_nnz));
2602 
2603   values = (PetscScalar *)V;
2604   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2605   for (i = 0; i < m; i++) {
2606     PetscInt        row   = i + rstart;
2607     PetscInt        ncols = ii[i + 1] - ii[i];
2608     const PetscInt *icols = jj + ii[i];
2609     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2610       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2611       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2612     } else { /* block ordering does not match so we can only insert one block at a time. */
2613       PetscInt j;
2614       for (j = 0; j < ncols; j++) {
2615         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2616         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2617       }
2618     }
2619   }
2620 
2621   if (!V) PetscCall(PetscFree(values));
2622   nooffprocentries    = B->nooffprocentries;
2623   B->nooffprocentries = PETSC_TRUE;
2624   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2625   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2626   B->nooffprocentries = nooffprocentries;
2627 
2628   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2629   PetscFunctionReturn(PETSC_SUCCESS);
2630 }
2631 
2632 /*@C
2633   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2634 
2635   Collective
2636 
2637   Input Parameters:
2638 + B  - the matrix
2639 . bs - the block size
2640 . i  - the indices into `j` for the start of each local row (starts with zero)
2641 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2642 - v  - optional values in the matrix, use `NULL` if not provided
2643 
2644   Level: advanced
2645 
2646   Notes:
2647   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2648   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2649   called this routine.
2650 
2651   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2652   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2653   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2654   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2655   block column and the second index is over columns within a block.
2656 
2657   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2658 
2659 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2660 @*/
2661 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2662 {
2663   PetscFunctionBegin;
2664   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2665   PetscValidType(B, 1);
2666   PetscValidLogicalCollectiveInt(B, bs, 2);
2667   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2668   PetscFunctionReturn(PETSC_SUCCESS);
2669 }
2670 
2671 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2672 {
2673   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2674   PetscInt     i;
2675   PetscMPIInt  size;
2676 
2677   PetscFunctionBegin;
2678   if (B->hash_active) {
2679     B->ops[0]      = b->cops;
2680     B->hash_active = PETSC_FALSE;
2681   }
2682   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2683   PetscCall(MatSetBlockSize(B, bs));
2684   PetscCall(PetscLayoutSetUp(B->rmap));
2685   PetscCall(PetscLayoutSetUp(B->cmap));
2686   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2687 
2688   if (d_nnz) {
2689     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2690   }
2691   if (o_nnz) {
2692     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2693   }
2694 
2695   b->bs2 = bs * bs;
2696   b->mbs = B->rmap->n / bs;
2697   b->nbs = B->cmap->n / bs;
2698   b->Mbs = B->rmap->N / bs;
2699   b->Nbs = B->cmap->N / bs;
2700 
2701   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2702   b->rstartbs = B->rmap->rstart / bs;
2703   b->rendbs   = B->rmap->rend / bs;
2704   b->cstartbs = B->cmap->rstart / bs;
2705   b->cendbs   = B->cmap->rend / bs;
2706 
2707 #if defined(PETSC_USE_CTABLE)
2708   PetscCall(PetscHMapIDestroy(&b->colmap));
2709 #else
2710   PetscCall(PetscFree(b->colmap));
2711 #endif
2712   PetscCall(PetscFree(b->garray));
2713   PetscCall(VecDestroy(&b->lvec));
2714   PetscCall(VecScatterDestroy(&b->Mvctx));
2715 
2716   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2717 
2718   MatSeqXAIJGetOptions_Private(b->B);
2719   PetscCall(MatDestroy(&b->B));
2720   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2721   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2722   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2723   MatSeqXAIJRestoreOptions_Private(b->B);
2724 
2725   MatSeqXAIJGetOptions_Private(b->A);
2726   PetscCall(MatDestroy(&b->A));
2727   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2728   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2729   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2730   MatSeqXAIJRestoreOptions_Private(b->A);
2731 
2732   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2733   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2734   B->preallocated  = PETSC_TRUE;
2735   B->was_assembled = PETSC_FALSE;
2736   B->assembled     = PETSC_FALSE;
2737   PetscFunctionReturn(PETSC_SUCCESS);
2738 }
2739 
2740 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2741 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2742 
2743 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2744 {
2745   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2746   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2747   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2748   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2749 
2750   PetscFunctionBegin;
2751   PetscCall(PetscMalloc1(M + 1, &ii));
2752   ii[0] = 0;
2753   for (i = 0; i < M; i++) {
2754     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2755     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2756     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2757     /* remove one from count of matrix has diagonal */
2758     for (j = id[i]; j < id[i + 1]; j++) {
2759       if (jd[j] == i) {
2760         ii[i + 1]--;
2761         break;
2762       }
2763     }
2764   }
2765   PetscCall(PetscMalloc1(ii[M], &jj));
2766   cnt = 0;
2767   for (i = 0; i < M; i++) {
2768     for (j = io[i]; j < io[i + 1]; j++) {
2769       if (garray[jo[j]] > rstart) break;
2770       jj[cnt++] = garray[jo[j]];
2771     }
2772     for (k = id[i]; k < id[i + 1]; k++) {
2773       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2774     }
2775     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2776   }
2777   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2778   PetscFunctionReturn(PETSC_SUCCESS);
2779 }
2780 
2781 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2782 
2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2784 
2785 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2786 {
2787   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2788   Mat_MPIAIJ  *b;
2789   Mat          B;
2790 
2791   PetscFunctionBegin;
2792   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2793 
2794   if (reuse == MAT_REUSE_MATRIX) {
2795     B = *newmat;
2796   } else {
2797     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2798     PetscCall(MatSetType(B, MATMPIAIJ));
2799     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2800     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2801     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2802     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2803   }
2804   b = (Mat_MPIAIJ *)B->data;
2805 
2806   if (reuse == MAT_REUSE_MATRIX) {
2807     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2808     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2809   } else {
2810     PetscInt   *garray = a->garray;
2811     Mat_SeqAIJ *bB;
2812     PetscInt    bs, nnz;
2813     PetscCall(MatDestroy(&b->A));
2814     PetscCall(MatDestroy(&b->B));
2815     /* just clear out the data structure */
2816     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2818     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2819 
2820     /* Global numbering for b->B columns */
2821     bB  = (Mat_SeqAIJ *)b->B->data;
2822     bs  = A->rmap->bs;
2823     nnz = bB->i[A->rmap->n];
2824     for (PetscInt k = 0; k < nnz; k++) {
2825       PetscInt bj = bB->j[k] / bs;
2826       PetscInt br = bB->j[k] % bs;
2827       bB->j[k]    = garray[bj] * bs + br;
2828     }
2829   }
2830   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2831   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2832 
2833   if (reuse == MAT_INPLACE_MATRIX) {
2834     PetscCall(MatHeaderReplace(A, &B));
2835   } else {
2836     *newmat = B;
2837   }
2838   PetscFunctionReturn(PETSC_SUCCESS);
2839 }
2840 
2841 /*MC
2842    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2843 
2844    Options Database Keys:
2845 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2846 . -mat_block_size <bs> - set the blocksize used to store the matrix
2847 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2848 - -mat_use_hash_table <fact> - set hash table factor
2849 
2850    Level: beginner
2851 
2852    Note:
2853     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2854     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2855 
2856 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2857 M*/
2858 
2859 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2860 
2861 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2862 {
2863   Mat_MPIBAIJ *b;
2864   PetscBool    flg = PETSC_FALSE;
2865 
2866   PetscFunctionBegin;
2867   PetscCall(PetscNew(&b));
2868   B->data      = (void *)b;
2869   B->ops[0]    = MatOps_Values;
2870   B->assembled = PETSC_FALSE;
2871 
2872   B->insertmode = NOT_SET_VALUES;
2873   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2874   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2875 
2876   /* build local table of row and column ownerships */
2877   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2878 
2879   /* build cache for off array entries formed */
2880   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2881 
2882   b->donotstash  = PETSC_FALSE;
2883   b->colmap      = NULL;
2884   b->garray      = NULL;
2885   b->roworiented = PETSC_TRUE;
2886 
2887   /* stuff used in block assembly */
2888   b->barray = NULL;
2889 
2890   /* stuff used for matrix vector multiply */
2891   b->lvec  = NULL;
2892   b->Mvctx = NULL;
2893 
2894   /* stuff for MatGetRow() */
2895   b->rowindices   = NULL;
2896   b->rowvalues    = NULL;
2897   b->getrowactive = PETSC_FALSE;
2898 
2899   /* hash table stuff */
2900   b->ht           = NULL;
2901   b->hd           = NULL;
2902   b->ht_size      = 0;
2903   b->ht_flag      = PETSC_FALSE;
2904   b->ht_fact      = 0;
2905   b->ht_total_ct  = 0;
2906   b->ht_insert_ct = 0;
2907 
2908   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2909   b->ijonly = PETSC_FALSE;
2910 
2911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2914 #if defined(PETSC_HAVE_HYPRE)
2915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2916 #endif
2917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2924   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2925 
2926   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2927   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2928   if (flg) {
2929     PetscReal fact = 1.39;
2930     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2931     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2932     if (fact <= 1.0) fact = 1.39;
2933     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2934     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2935   }
2936   PetscOptionsEnd();
2937   PetscFunctionReturn(PETSC_SUCCESS);
2938 }
2939 
2940 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2941 /*MC
2942    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2943 
2944    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2945    and `MATMPIBAIJ` otherwise.
2946 
2947    Options Database Keys:
2948 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2949 
2950   Level: beginner
2951 
2952 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2953 M*/
2954 
2955 /*@
2956   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2957   (block compressed row).
2958 
2959   Collective
2960 
2961   Input Parameters:
2962 + B     - the matrix
2963 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2964           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2965 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2966            submatrix  (same for all local rows)
2967 . d_nnz - array containing the number of block nonzeros in the various block rows
2968            of the in diagonal portion of the local (possibly different for each block
2969            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2970            set it even if it is zero.
2971 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2972            submatrix (same for all local rows).
2973 - o_nnz - array containing the number of nonzeros in the various block rows of the
2974            off-diagonal portion of the local submatrix (possibly different for
2975            each block row) or `NULL`.
2976 
2977    If the *_nnz parameter is given then the *_nz parameter is ignored
2978 
2979   Options Database Keys:
2980 + -mat_block_size            - size of the blocks to use
2981 - -mat_use_hash_table <fact> - set hash table factor
2982 
2983   Level: intermediate
2984 
2985   Notes:
2986   For good matrix assembly performance
2987   the user should preallocate the matrix storage by setting the parameters
2988   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2989   performance can be increased by more than a factor of 50.
2990 
2991   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2992   than it must be used on all processors that share the object for that argument.
2993 
2994   Storage Information:
2995   For a square global matrix we define each processor's diagonal portion
2996   to be its local rows and the corresponding columns (a square submatrix);
2997   each processor's off-diagonal portion encompasses the remainder of the
2998   local matrix (a rectangular submatrix).
2999 
3000   The user can specify preallocated storage for the diagonal part of
3001   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3002   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3003   memory allocation.  Likewise, specify preallocated storage for the
3004   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3005 
3006   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3007   the figure below we depict these three local rows and all columns (0-11).
3008 
3009 .vb
3010            0 1 2 3 4 5 6 7 8 9 10 11
3011           --------------------------
3012    row 3  |o o o d d d o o o o  o  o
3013    row 4  |o o o d d d o o o o  o  o
3014    row 5  |o o o d d d o o o o  o  o
3015           --------------------------
3016 .ve
3017 
3018   Thus, any entries in the d locations are stored in the d (diagonal)
3019   submatrix, and any entries in the o locations are stored in the
3020   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3021   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3022 
3023   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3024   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3025   In general, for PDE problems in which most nonzeros are near the diagonal,
3026   one expects `d_nz` >> `o_nz`.
3027 
3028   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3029   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3030   You can also run with the option `-info` and look for messages with the string
3031   malloc in them to see if additional memory allocation was needed.
3032 
3033 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3034 @*/
3035 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3036 {
3037   PetscFunctionBegin;
3038   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3039   PetscValidType(B, 1);
3040   PetscValidLogicalCollectiveInt(B, bs, 2);
3041   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3042   PetscFunctionReturn(PETSC_SUCCESS);
3043 }
3044 
3045 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3046 /*@
3047   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3048   (block compressed row).
3049 
3050   Collective
3051 
3052   Input Parameters:
3053 + comm  - MPI communicator
3054 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3055           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3056 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3057           This value should be the same as the local size used in creating the
3058           y vector for the matrix-vector product y = Ax.
3059 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3060           This value should be the same as the local size used in creating the
3061           x vector for the matrix-vector product y = Ax.
3062 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3063 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3064 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3065           submatrix  (same for all local rows)
3066 . d_nnz - array containing the number of nonzero blocks in the various block rows
3067           of the in diagonal portion of the local (possibly different for each block
3068           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3069           and set it even if it is zero.
3070 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3071           submatrix (same for all local rows).
3072 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3073           off-diagonal portion of the local submatrix (possibly different for
3074           each block row) or NULL.
3075 
3076   Output Parameter:
3077 . A - the matrix
3078 
3079   Options Database Keys:
3080 + -mat_block_size            - size of the blocks to use
3081 - -mat_use_hash_table <fact> - set hash table factor
3082 
3083   Level: intermediate
3084 
3085   Notes:
3086   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3087   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3088   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3089 
3090   For good matrix assembly performance
3091   the user should preallocate the matrix storage by setting the parameters
3092   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3093   performance can be increased by more than a factor of 50.
3094 
3095   If the *_nnz parameter is given then the *_nz parameter is ignored
3096 
3097   A nonzero block is any block that as 1 or more nonzeros in it
3098 
3099   The user MUST specify either the local or global matrix dimensions
3100   (possibly both).
3101 
3102   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3103   than it must be used on all processors that share the object for that argument.
3104 
3105   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3106   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3107 
3108   Storage Information:
3109   For a square global matrix we define each processor's diagonal portion
3110   to be its local rows and the corresponding columns (a square submatrix);
3111   each processor's off-diagonal portion encompasses the remainder of the
3112   local matrix (a rectangular submatrix).
3113 
3114   The user can specify preallocated storage for the diagonal part of
3115   the local submatrix with either d_nz or d_nnz (not both).  Set
3116   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3117   memory allocation.  Likewise, specify preallocated storage for the
3118   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3119 
3120   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3121   the figure below we depict these three local rows and all columns (0-11).
3122 
3123 .vb
3124            0 1 2 3 4 5 6 7 8 9 10 11
3125           --------------------------
3126    row 3  |o o o d d d o o o o  o  o
3127    row 4  |o o o d d d o o o o  o  o
3128    row 5  |o o o d d d o o o o  o  o
3129           --------------------------
3130 .ve
3131 
3132   Thus, any entries in the d locations are stored in the d (diagonal)
3133   submatrix, and any entries in the o locations are stored in the
3134   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3135   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3136 
3137   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3138   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3139   In general, for PDE problems in which most nonzeros are near the diagonal,
3140   one expects `d_nz` >> `o_nz`.
3141 
3142 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3143           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3144 @*/
3145 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3146 {
3147   PetscMPIInt size;
3148 
3149   PetscFunctionBegin;
3150   PetscCall(MatCreate(comm, A));
3151   PetscCall(MatSetSizes(*A, m, n, M, N));
3152   PetscCallMPI(MPI_Comm_size(comm, &size));
3153   if (size > 1) {
3154     PetscCall(MatSetType(*A, MATMPIBAIJ));
3155     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3156   } else {
3157     PetscCall(MatSetType(*A, MATSEQBAIJ));
3158     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3159   }
3160   PetscFunctionReturn(PETSC_SUCCESS);
3161 }
3162 
3163 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3164 {
3165   Mat          mat;
3166   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3167   PetscInt     len = 0;
3168 
3169   PetscFunctionBegin;
3170   *newmat = NULL;
3171   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3172   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3173   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3174 
3175   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3176   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3177   if (matin->hash_active) {
3178     PetscCall(MatSetUp(mat));
3179   } else {
3180     mat->factortype   = matin->factortype;
3181     mat->preallocated = PETSC_TRUE;
3182     mat->assembled    = PETSC_TRUE;
3183     mat->insertmode   = NOT_SET_VALUES;
3184 
3185     a             = (Mat_MPIBAIJ *)mat->data;
3186     mat->rmap->bs = matin->rmap->bs;
3187     a->bs2        = oldmat->bs2;
3188     a->mbs        = oldmat->mbs;
3189     a->nbs        = oldmat->nbs;
3190     a->Mbs        = oldmat->Mbs;
3191     a->Nbs        = oldmat->Nbs;
3192 
3193     a->size         = oldmat->size;
3194     a->rank         = oldmat->rank;
3195     a->donotstash   = oldmat->donotstash;
3196     a->roworiented  = oldmat->roworiented;
3197     a->rowindices   = NULL;
3198     a->rowvalues    = NULL;
3199     a->getrowactive = PETSC_FALSE;
3200     a->barray       = NULL;
3201     a->rstartbs     = oldmat->rstartbs;
3202     a->rendbs       = oldmat->rendbs;
3203     a->cstartbs     = oldmat->cstartbs;
3204     a->cendbs       = oldmat->cendbs;
3205 
3206     /* hash table stuff */
3207     a->ht           = NULL;
3208     a->hd           = NULL;
3209     a->ht_size      = 0;
3210     a->ht_flag      = oldmat->ht_flag;
3211     a->ht_fact      = oldmat->ht_fact;
3212     a->ht_total_ct  = 0;
3213     a->ht_insert_ct = 0;
3214 
3215     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3216     if (oldmat->colmap) {
3217 #if defined(PETSC_USE_CTABLE)
3218       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3219 #else
3220       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3221       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3222 #endif
3223     } else a->colmap = NULL;
3224 
3225     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3226       PetscCall(PetscMalloc1(len, &a->garray));
3227       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3228     } else a->garray = NULL;
3229 
3230     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3231     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3232     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3233 
3234     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3235     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3236   }
3237   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3238   *newmat = mat;
3239   PetscFunctionReturn(PETSC_SUCCESS);
3240 }
3241 
3242 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3243 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3244 {
3245   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3246   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3247   PetscScalar *matvals;
3248 
3249   PetscFunctionBegin;
3250   PetscCall(PetscViewerSetUp(viewer));
3251 
3252   /* read in matrix header */
3253   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3254   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3255   M  = header[1];
3256   N  = header[2];
3257   nz = header[3];
3258   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3259   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3260   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3261 
3262   /* set block sizes from the viewer's .info file */
3263   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3264   /* set local sizes if not set already */
3265   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3266   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3267   /* set global sizes if not set already */
3268   if (mat->rmap->N < 0) mat->rmap->N = M;
3269   if (mat->cmap->N < 0) mat->cmap->N = N;
3270   PetscCall(PetscLayoutSetUp(mat->rmap));
3271   PetscCall(PetscLayoutSetUp(mat->cmap));
3272 
3273   /* check if the matrix sizes are correct */
3274   PetscCall(MatGetSize(mat, &rows, &cols));
3275   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3276   PetscCall(MatGetBlockSize(mat, &bs));
3277   PetscCall(MatGetLocalSize(mat, &m, &n));
3278   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3279   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3280   mbs = m / bs;
3281   nbs = n / bs;
3282 
3283   /* read in row lengths and build row indices */
3284   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3285   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3286   rowidxs[0] = 0;
3287   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3288   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3289   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3290 
3291   /* read in column indices and matrix values */
3292   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3293   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3294   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3295 
3296   {                /* preallocate matrix storage */
3297     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3298     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3299     PetscBool  sbaij, done;
3300     PetscInt  *d_nnz, *o_nnz;
3301 
3302     PetscCall(PetscBTCreate(nbs, &bt));
3303     PetscCall(PetscHSetICreate(&ht));
3304     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3305     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3306     for (i = 0; i < mbs; i++) {
3307       PetscCall(PetscBTMemzero(nbs, bt));
3308       PetscCall(PetscHSetIClear(ht));
3309       for (k = 0; k < bs; k++) {
3310         PetscInt row = bs * i + k;
3311         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3312           PetscInt col = colidxs[j];
3313           if (!sbaij || col >= row) {
3314             if (col >= cs && col < ce) {
3315               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3316             } else {
3317               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3318               if (done) o_nnz[i]++;
3319             }
3320           }
3321         }
3322       }
3323     }
3324     PetscCall(PetscBTDestroy(&bt));
3325     PetscCall(PetscHSetIDestroy(&ht));
3326     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3327     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3328     PetscCall(PetscFree2(d_nnz, o_nnz));
3329   }
3330 
3331   /* store matrix values */
3332   for (i = 0; i < m; i++) {
3333     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3334     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3335   }
3336 
3337   PetscCall(PetscFree(rowidxs));
3338   PetscCall(PetscFree2(colidxs, matvals));
3339   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3340   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3341   PetscFunctionReturn(PETSC_SUCCESS);
3342 }
3343 
3344 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3345 {
3346   PetscBool isbinary;
3347 
3348   PetscFunctionBegin;
3349   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3350   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3351   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3352   PetscFunctionReturn(PETSC_SUCCESS);
3353 }
3354 
3355 /*@
3356   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3357 
3358   Input Parameters:
3359 + mat  - the matrix
3360 - fact - factor
3361 
3362   Options Database Key:
3363 . -mat_use_hash_table <fact> - provide the factor
3364 
3365   Level: advanced
3366 
3367 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3368 @*/
3369 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3370 {
3371   PetscFunctionBegin;
3372   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3373   PetscFunctionReturn(PETSC_SUCCESS);
3374 }
3375 
3376 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3377 {
3378   Mat_MPIBAIJ *baij;
3379 
3380   PetscFunctionBegin;
3381   baij          = (Mat_MPIBAIJ *)mat->data;
3382   baij->ht_fact = fact;
3383   PetscFunctionReturn(PETSC_SUCCESS);
3384 }
3385 
3386 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3387 {
3388   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3389   PetscBool    flg;
3390 
3391   PetscFunctionBegin;
3392   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3393   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3394   if (Ad) *Ad = a->A;
3395   if (Ao) *Ao = a->B;
3396   if (colmap) *colmap = a->garray;
3397   PetscFunctionReturn(PETSC_SUCCESS);
3398 }
3399 
3400 /*
3401     Special version for direct calls from Fortran (to eliminate two function call overheads
3402 */
3403 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3404   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3405 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3406   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3407 #endif
3408 
3409 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3410 /*@C
3411   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3412 
3413   Collective
3414 
3415   Input Parameters:
3416 + matin  - the matrix
3417 . min    - number of input rows
3418 . im     - input rows
3419 . nin    - number of input columns
3420 . in     - input columns
3421 . v      - numerical values input
3422 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3423 
3424   Level: advanced
3425 
3426   Developer Notes:
3427   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3428 
3429 .seealso: `Mat`, `MatSetValuesBlocked()`
3430 @*/
3431 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3432 {
3433   /* convert input arguments to C version */
3434   Mat        mat = *matin;
3435   PetscInt   m = *min, n = *nin;
3436   InsertMode addv = *addvin;
3437 
3438   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3439   const MatScalar *value;
3440   MatScalar       *barray      = baij->barray;
3441   PetscBool        roworiented = baij->roworiented;
3442   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3443   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3444   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3445 
3446   PetscFunctionBegin;
3447   /* tasks normally handled by MatSetValuesBlocked() */
3448   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3449   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3450   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3451   if (mat->assembled) {
3452     mat->was_assembled = PETSC_TRUE;
3453     mat->assembled     = PETSC_FALSE;
3454   }
3455   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3456 
3457   if (!barray) {
3458     PetscCall(PetscMalloc1(bs2, &barray));
3459     baij->barray = barray;
3460   }
3461 
3462   if (roworiented) stepval = (n - 1) * bs;
3463   else stepval = (m - 1) * bs;
3464 
3465   for (i = 0; i < m; i++) {
3466     if (im[i] < 0) continue;
3467     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3468     if (im[i] >= rstart && im[i] < rend) {
3469       row = im[i] - rstart;
3470       for (j = 0; j < n; j++) {
3471         /* If NumCol = 1 then a copy is not required */
3472         if ((roworiented) && (n == 1)) {
3473           barray = (MatScalar *)v + i * bs2;
3474         } else if ((!roworiented) && (m == 1)) {
3475           barray = (MatScalar *)v + j * bs2;
3476         } else { /* Here a copy is required */
3477           if (roworiented) {
3478             value = v + i * (stepval + bs) * bs + j * bs;
3479           } else {
3480             value = v + j * (stepval + bs) * bs + i * bs;
3481           }
3482           for (ii = 0; ii < bs; ii++, value += stepval) {
3483             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3484           }
3485           barray -= bs2;
3486         }
3487 
3488         if (in[j] >= cstart && in[j] < cend) {
3489           col = in[j] - cstart;
3490           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3491         } else if (in[j] < 0) {
3492           continue;
3493         } else {
3494           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3495           if (mat->was_assembled) {
3496             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3497 
3498 #if defined(PETSC_USE_DEBUG)
3499   #if defined(PETSC_USE_CTABLE)
3500             {
3501               PetscInt data;
3502               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3503               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3504             }
3505   #else
3506             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3507   #endif
3508 #endif
3509 #if defined(PETSC_USE_CTABLE)
3510             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3511             col = (col - 1) / bs;
3512 #else
3513             col = (baij->colmap[in[j]] - 1) / bs;
3514 #endif
3515             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3516               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3517               col = in[j];
3518             }
3519           } else col = in[j];
3520           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3521         }
3522       }
3523     } else {
3524       if (!baij->donotstash) {
3525         if (roworiented) {
3526           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3527         } else {
3528           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3529         }
3530       }
3531     }
3532   }
3533 
3534   /* task normally handled by MatSetValuesBlocked() */
3535   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3536   PetscFunctionReturn(PETSC_SUCCESS);
3537 }
3538 
3539 /*@
3540   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3541 
3542   Collective
3543 
3544   Input Parameters:
3545 + comm - MPI communicator
3546 . bs   - the block size, only a block size of 1 is supported
3547 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3548 . n    - This value should be the same as the local size used in creating the
3549          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3550          calculated if `N` is given) For square matrices `n` is almost always `m`.
3551 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3552 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3553 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3554 . j    - column indices
3555 - a    - matrix values
3556 
3557   Output Parameter:
3558 . mat - the matrix
3559 
3560   Level: intermediate
3561 
3562   Notes:
3563   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3564   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3565   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3566 
3567   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3568   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3569   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3570   with column-major ordering within blocks.
3571 
3572   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3573 
3574 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3575           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3576 @*/
3577 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3578 {
3579   PetscFunctionBegin;
3580   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3581   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3582   PetscCall(MatCreate(comm, mat));
3583   PetscCall(MatSetSizes(*mat, m, n, M, N));
3584   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3585   PetscCall(MatSetBlockSize(*mat, bs));
3586   PetscCall(MatSetUp(*mat));
3587   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3588   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3589   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3590   PetscFunctionReturn(PETSC_SUCCESS);
3591 }
3592 
3593 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3594 {
3595   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3596   PetscInt    *indx;
3597   PetscScalar *values;
3598 
3599   PetscFunctionBegin;
3600   PetscCall(MatGetSize(inmat, &m, &N));
3601   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3602     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3603     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3604     PetscInt    *bindx, rmax = a->rmax, j;
3605     PetscMPIInt  rank, size;
3606 
3607     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3608     mbs = m / bs;
3609     Nbs = N / cbs;
3610     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3611     nbs = n / cbs;
3612 
3613     PetscCall(PetscMalloc1(rmax, &bindx));
3614     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3615 
3616     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3617     PetscCallMPI(MPI_Comm_rank(comm, &size));
3618     if (rank == size - 1) {
3619       /* Check sum(nbs) = Nbs */
3620       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3621     }
3622 
3623     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3624     for (i = 0; i < mbs; i++) {
3625       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3626       nnz = nnz / bs;
3627       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3628       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3629       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3630     }
3631     PetscCall(PetscFree(bindx));
3632 
3633     PetscCall(MatCreate(comm, outmat));
3634     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3635     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3636     PetscCall(MatSetType(*outmat, MATBAIJ));
3637     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3638     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3639     MatPreallocateEnd(dnz, onz);
3640     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3641   }
3642 
3643   /* numeric phase */
3644   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3645   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3646 
3647   for (i = 0; i < m; i++) {
3648     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3649     Ii = i + rstart;
3650     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3651     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3652   }
3653   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3654   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3655   PetscFunctionReturn(PETSC_SUCCESS);
3656 }
3657