xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision dc97da86830806a16cfaacfab740baa2e54c8135)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
663       row = idxm[i] - bsrstart;
664       for (j = 0; j < n; j++) {
665         if (idxn[j] < 0) continue; /* negative column */
666         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667         if (idxn[j] >= bscstart && idxn[j] < bscend) {
668           col = idxn[j] - bscstart;
669           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670         } else {
671           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674           data--;
675 #else
676           data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
679           else {
680             col = data + idxn[j] % bs;
681             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682           }
683         }
684       }
685     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
691 {
692   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
693   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
694   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
695   PetscReal    sum = 0.0;
696   MatScalar   *v;
697 
698   PetscFunctionBegin;
699   if (baij->size == 1) {
700     PetscCall(MatNorm(baij->A, type, nrm));
701   } else {
702     if (type == NORM_FROBENIUS) {
703       v  = amat->a;
704       nz = amat->nz * bs2;
705       for (i = 0; i < nz; i++) {
706         sum += PetscRealPart(PetscConj(*v) * (*v));
707         v++;
708       }
709       v  = bmat->a;
710       nz = bmat->nz * bs2;
711       for (i = 0; i < nz; i++) {
712         sum += PetscRealPart(PetscConj(*v) * (*v));
713         v++;
714       }
715       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
716       *nrm = PetscSqrtReal(*nrm);
717     } else if (type == NORM_1) { /* max column sum */
718       PetscReal  *tmp, *tmp2;
719       PetscInt   *jj, *garray = baij->garray, cstart = baij->rstartbs;
720       PetscMPIInt iN;
721 
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
749       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
750       *nrm = 0.0;
751       for (j = 0; j < mat->cmap->N; j++) {
752         if (tmp2[j] > *nrm) *nrm = tmp2[j];
753       }
754       PetscCall(PetscFree(tmp));
755       PetscCall(PetscFree(tmp2));
756     } else if (type == NORM_INFINITY) { /* max row sum */
757       PetscReal *sums;
758       PetscCall(PetscMalloc1(bs, &sums));
759       sum = 0.0;
760       for (j = 0; j < amat->mbs; j++) {
761         for (row = 0; row < bs; row++) sums[row] = 0.0;
762         v  = amat->a + bs2 * amat->i[j];
763         nz = amat->i[j + 1] - amat->i[j];
764         for (i = 0; i < nz; i++) {
765           for (col = 0; col < bs; col++) {
766             for (row = 0; row < bs; row++) {
767               sums[row] += PetscAbsScalar(*v);
768               v++;
769             }
770           }
771         }
772         v  = bmat->a + bs2 * bmat->i[j];
773         nz = bmat->i[j + 1] - bmat->i[j];
774         for (i = 0; i < nz; i++) {
775           for (col = 0; col < bs; col++) {
776             for (row = 0; row < bs; row++) {
777               sums[row] += PetscAbsScalar(*v);
778               v++;
779             }
780           }
781         }
782         for (row = 0; row < bs; row++) {
783           if (sums[row] > sum) sum = sums[row];
784         }
785       }
786       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
787       PetscCall(PetscFree(sums));
788     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
789   }
790   PetscFunctionReturn(PETSC_SUCCESS);
791 }
792 
793 /*
794   Creates the hash table, and sets the table
795   This table is created only once.
796   If new entried need to be added to the matrix
797   then the hash table has to be destroyed and
798   recreated.
799 */
800 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
801 {
802   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
803   Mat          A = baij->A, B = baij->B;
804   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
805   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
806   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
807   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
808   PetscInt    *HT, key;
809   MatScalar  **HD;
810   PetscReal    tmp;
811 #if defined(PETSC_USE_INFO)
812   PetscInt ct = 0, max = 0;
813 #endif
814 
815   PetscFunctionBegin;
816   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
817 
818   baij->ht_size = (PetscInt)(factor * nz);
819   ht_size       = baij->ht_size;
820 
821   /* Allocate Memory for Hash Table */
822   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
823   HD = baij->hd;
824   HT = baij->ht;
825 
826   /* Loop Over A */
827   for (i = 0; i < a->mbs; i++) {
828     for (j = ai[i]; j < ai[i + 1]; j++) {
829       row = i + rstart;
830       col = aj[j] + cstart;
831 
832       key = row * Nbs + col + 1;
833       h1  = HASH(ht_size, key, tmp);
834       for (k = 0; k < ht_size; k++) {
835         if (!HT[(h1 + k) % ht_size]) {
836           HT[(h1 + k) % ht_size] = key;
837           HD[(h1 + k) % ht_size] = a->a + j * bs2;
838           break;
839 #if defined(PETSC_USE_INFO)
840         } else {
841           ct++;
842 #endif
843         }
844       }
845 #if defined(PETSC_USE_INFO)
846       if (k > max) max = k;
847 #endif
848     }
849   }
850   /* Loop Over B */
851   for (i = 0; i < b->mbs; i++) {
852     for (j = bi[i]; j < bi[i + 1]; j++) {
853       row = i + rstart;
854       col = garray[bj[j]];
855       key = row * Nbs + col + 1;
856       h1  = HASH(ht_size, key, tmp);
857       for (k = 0; k < ht_size; k++) {
858         if (!HT[(h1 + k) % ht_size]) {
859           HT[(h1 + k) % ht_size] = key;
860           HD[(h1 + k) % ht_size] = b->a + j * bs2;
861           break;
862 #if defined(PETSC_USE_INFO)
863         } else {
864           ct++;
865 #endif
866         }
867       }
868 #if defined(PETSC_USE_INFO)
869       if (k > max) max = k;
870 #endif
871     }
872   }
873 
874   /* Print Summary */
875 #if defined(PETSC_USE_INFO)
876   for (i = 0, j = 0; i < ht_size; i++) {
877     if (HT[i]) j++;
878   }
879   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
880 #endif
881   PetscFunctionReturn(PETSC_SUCCESS);
882 }
883 
884 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
885 {
886   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
887   PetscInt     nstash, reallocs;
888 
889   PetscFunctionBegin;
890   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
891 
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
893   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
894   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
895   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
896   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
897   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
898   PetscFunctionReturn(PETSC_SUCCESS);
899 }
900 
901 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
902 {
903   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
904   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
905   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
906   PetscInt    *row, *col;
907   PetscBool    r1, r2, r3, other_disassembled;
908   MatScalar   *val;
909   PetscMPIInt  n;
910 
911   PetscFunctionBegin;
912   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
913   if (!baij->donotstash && !mat->nooffprocentries) {
914     while (1) {
915       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
916       if (!flg) break;
917 
918       for (i = 0; i < n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j = i, rstart = row[j]; j < n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j - i;
924         else ncols = n - i;
925         /* Now assemble all these values with a single function call */
926         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
927         i = j;
928       }
929     }
930     PetscCall(MatStashScatterEnd_Private(&mat->stash));
931     /* Now process the block-stash. Since the values are stashed column-oriented,
932        set the row-oriented flag to column-oriented, and after MatSetValues()
933        restore the original flags */
934     r1 = baij->roworiented;
935     r2 = a->roworiented;
936     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
937 
938     baij->roworiented                           = PETSC_FALSE;
939     a->roworiented                              = PETSC_FALSE;
940     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
941     while (1) {
942       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
943       if (!flg) break;
944 
945       for (i = 0; i < n;) {
946         /* Now identify the consecutive vals belonging to the same row */
947         for (j = i, rstart = row[j]; j < n; j++) {
948           if (row[j] != rstart) break;
949         }
950         if (j < n) ncols = j - i;
951         else ncols = n - i;
952         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
953         i = j;
954       }
955     }
956     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
957 
958     baij->roworiented                           = r1;
959     a->roworiented                              = r2;
960     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
961   }
962 
963   PetscCall(MatAssemblyBegin(baij->A, mode));
964   PetscCall(MatAssemblyEnd(baij->A, mode));
965 
966   /* determine if any processor has disassembled, if so we must
967      also disassemble ourselves, in order that we may reassemble. */
968   /*
969      if nonzero structure of submatrix B cannot change then we know that
970      no processor disassembled thus we can skip this stuff
971   */
972   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
973     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
974     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
975   }
976 
977   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
978   PetscCall(MatAssemblyBegin(baij->B, mode));
979   PetscCall(MatAssemblyEnd(baij->B, mode));
980 
981 #if defined(PETSC_USE_INFO)
982   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
983     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
984 
985     baij->ht_total_ct  = 0;
986     baij->ht_insert_ct = 0;
987   }
988 #endif
989   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
990     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
991 
992     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
993     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
994   }
995 
996   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
997 
998   baij->rowvalues = NULL;
999 
1000   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1001   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
1002     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1003     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1004   }
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1009 #include <petscdraw.h>
1010 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1011 {
1012   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1013   PetscMPIInt       rank = baij->rank;
1014   PetscInt          bs   = mat->rmap->bs;
1015   PetscBool         iascii, isdraw;
1016   PetscViewer       sviewer;
1017   PetscViewerFormat format;
1018 
1019   PetscFunctionBegin;
1020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1021   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1022   if (iascii) {
1023     PetscCall(PetscViewerGetFormat(viewer, &format));
1024     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1025       MatInfo info;
1026       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1027       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1028       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1029       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1030                                                    mat->rmap->bs, (double)info.memory));
1031       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1032       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1033       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1034       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1035       PetscCall(PetscViewerFlush(viewer));
1036       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1037       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1038       PetscCall(VecScatterView(baij->Mvctx, viewer));
1039       PetscFunctionReturn(PETSC_SUCCESS);
1040     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1041       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1042       PetscFunctionReturn(PETSC_SUCCESS);
1043     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1044       PetscFunctionReturn(PETSC_SUCCESS);
1045     }
1046   }
1047 
1048   if (isdraw) {
1049     PetscDraw draw;
1050     PetscBool isnull;
1051     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1052     PetscCall(PetscDrawIsNull(draw, &isnull));
1053     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1054   }
1055 
1056   {
1057     /* assemble the entire matrix onto first processor. */
1058     Mat          A;
1059     Mat_SeqBAIJ *Aloc;
1060     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1061     MatScalar   *a;
1062     const char  *matname;
1063 
1064     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1065     /* Perhaps this should be the type of mat? */
1066     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1067     if (rank == 0) {
1068       PetscCall(MatSetSizes(A, M, N, M, N));
1069     } else {
1070       PetscCall(MatSetSizes(A, 0, 0, M, N));
1071     }
1072     PetscCall(MatSetType(A, MATMPIBAIJ));
1073     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1074     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1075 
1076     /* copy over the A part */
1077     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1078     ai   = Aloc->i;
1079     aj   = Aloc->j;
1080     a    = Aloc->a;
1081     PetscCall(PetscMalloc1(bs, &rvals));
1082 
1083     for (i = 0; i < mbs; i++) {
1084       rvals[0] = bs * (baij->rstartbs + i);
1085       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1086       for (j = ai[i]; j < ai[i + 1]; j++) {
1087         col = (baij->cstartbs + aj[j]) * bs;
1088         for (k = 0; k < bs; k++) {
1089           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1090           col++;
1091           a += bs;
1092         }
1093       }
1094     }
1095     /* copy over the B part */
1096     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1097     ai   = Aloc->i;
1098     aj   = Aloc->j;
1099     a    = Aloc->a;
1100     for (i = 0; i < mbs; i++) {
1101       rvals[0] = bs * (baij->rstartbs + i);
1102       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1103       for (j = ai[i]; j < ai[i + 1]; j++) {
1104         col = baij->garray[aj[j]] * bs;
1105         for (k = 0; k < bs; k++) {
1106           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1107           col++;
1108           a += bs;
1109         }
1110       }
1111     }
1112     PetscCall(PetscFree(rvals));
1113     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1114     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1115     /*
1116        Everyone has to call to draw the matrix since the graphics waits are
1117        synchronized across all processors that share the PetscDraw object
1118     */
1119     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1120     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1121     if (rank == 0) {
1122       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1123       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1124     }
1125     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1126     PetscCall(MatDestroy(&A));
1127   }
1128   PetscFunctionReturn(PETSC_SUCCESS);
1129 }
1130 
1131 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1132 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1133 {
1134   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1135   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1136   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1137   const PetscInt *garray = aij->garray;
1138   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1139   PetscCount      nz, hnz;
1140   PetscInt       *rowlens, *colidxs;
1141   PetscScalar    *matvals;
1142   PetscMPIInt     rank;
1143 
1144   PetscFunctionBegin;
1145   PetscCall(PetscViewerSetUp(viewer));
1146 
1147   M  = mat->rmap->N;
1148   N  = mat->cmap->N;
1149   m  = mat->rmap->n;
1150   rs = mat->rmap->rstart;
1151   cs = mat->cmap->rstart;
1152   bs = mat->rmap->bs;
1153   nz = bs * bs * (A->nz + B->nz);
1154 
1155   /* write matrix header */
1156   header[0] = MAT_FILE_CLASSID;
1157   header[1] = M;
1158   header[2] = N;
1159   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1160   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1161   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1162   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1163 
1164   /* fill in and store row lengths */
1165   PetscCall(PetscMalloc1(m, &rowlens));
1166   for (cnt = 0, i = 0; i < A->mbs; i++)
1167     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1168   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1169   PetscCall(PetscFree(rowlens));
1170 
1171   /* fill in and store column indices */
1172   PetscCall(PetscMalloc1(nz, &colidxs));
1173   for (cnt = 0, i = 0; i < A->mbs; i++) {
1174     for (k = 0; k < bs; k++) {
1175       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1176         if (garray[B->j[jb]] > cs / bs) break;
1177         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1178       }
1179       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1180         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1181       for (; jb < B->i[i + 1]; jb++)
1182         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1183     }
1184   }
1185   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1186   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1187   PetscCall(PetscFree(colidxs));
1188 
1189   /* fill in and store nonzero values */
1190   PetscCall(PetscMalloc1(nz, &matvals));
1191   for (cnt = 0, i = 0; i < A->mbs; i++) {
1192     for (k = 0; k < bs; k++) {
1193       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1194         if (garray[B->j[jb]] > cs / bs) break;
1195         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1196       }
1197       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1198         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1199       for (; jb < B->i[i + 1]; jb++)
1200         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1201     }
1202   }
1203   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1204   PetscCall(PetscFree(matvals));
1205 
1206   /* write block size option to the viewer's .info file */
1207   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1208   PetscFunctionReturn(PETSC_SUCCESS);
1209 }
1210 
1211 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1212 {
1213   PetscBool iascii, isdraw, issocket, isbinary;
1214 
1215   PetscFunctionBegin;
1216   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1219   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1220   if (iascii || isdraw || issocket) {
1221     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1222   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1223   PetscFunctionReturn(PETSC_SUCCESS);
1224 }
1225 
1226 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1227 {
1228   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1229   PetscInt     nt;
1230 
1231   PetscFunctionBegin;
1232   PetscCall(VecGetLocalSize(xx, &nt));
1233   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1234   PetscCall(VecGetLocalSize(yy, &nt));
1235   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1236   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1237   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1238   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1239   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1240   PetscFunctionReturn(PETSC_SUCCESS);
1241 }
1242 
1243 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1244 {
1245   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1246 
1247   PetscFunctionBegin;
1248   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1249   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1250   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1251   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1256 {
1257   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1258 
1259   PetscFunctionBegin;
1260   /* do nondiagonal part */
1261   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1262   /* do local part */
1263   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1264   /* add partial results together */
1265   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1266   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1267   PetscFunctionReturn(PETSC_SUCCESS);
1268 }
1269 
1270 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1271 {
1272   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1273 
1274   PetscFunctionBegin;
1275   /* do nondiagonal part */
1276   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1277   /* do local part */
1278   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1279   /* add partial results together */
1280   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1281   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1282   PetscFunctionReturn(PETSC_SUCCESS);
1283 }
1284 
1285 /*
1286   This only works correctly for square matrices where the subblock A->A is the
1287    diagonal block
1288 */
1289 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1290 {
1291   PetscFunctionBegin;
1292   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1293   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1294   PetscFunctionReturn(PETSC_SUCCESS);
1295 }
1296 
1297 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1298 {
1299   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1300 
1301   PetscFunctionBegin;
1302   PetscCall(MatScale(a->A, aa));
1303   PetscCall(MatScale(a->B, aa));
1304   PetscFunctionReturn(PETSC_SUCCESS);
1305 }
1306 
1307 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1308 {
1309   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1310   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1311   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1312   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1313   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1314 
1315   PetscFunctionBegin;
1316   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1317   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1318   mat->getrowactive = PETSC_TRUE;
1319 
1320   if (!mat->rowvalues && (idx || v)) {
1321     /*
1322         allocate enough space to hold information from the longest row.
1323     */
1324     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1325     PetscInt     max = 1, mbs = mat->mbs, tmp;
1326     for (i = 0; i < mbs; i++) {
1327       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1328       if (max < tmp) max = tmp;
1329     }
1330     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1331   }
1332   lrow = row - brstart;
1333 
1334   pvA = &vworkA;
1335   pcA = &cworkA;
1336   pvB = &vworkB;
1337   pcB = &cworkB;
1338   if (!v) {
1339     pvA = NULL;
1340     pvB = NULL;
1341   }
1342   if (!idx) {
1343     pcA = NULL;
1344     if (!v) pcB = NULL;
1345   }
1346   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1347   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1348   nztot = nzA + nzB;
1349 
1350   cmap = mat->garray;
1351   if (v || idx) {
1352     if (nztot) {
1353       /* Sort by increasing column numbers, assuming A and B already sorted */
1354       PetscInt imark = -1;
1355       if (v) {
1356         *v = v_p = mat->rowvalues;
1357         for (i = 0; i < nzB; i++) {
1358           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1359           else break;
1360         }
1361         imark = i;
1362         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1363         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1364       }
1365       if (idx) {
1366         *idx = idx_p = mat->rowindices;
1367         if (imark > -1) {
1368           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1369         } else {
1370           for (i = 0; i < nzB; i++) {
1371             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1372             else break;
1373           }
1374           imark = i;
1375         }
1376         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1377         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1378       }
1379     } else {
1380       if (idx) *idx = NULL;
1381       if (v) *v = NULL;
1382     }
1383   }
1384   *nz = nztot;
1385   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1386   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1387   PetscFunctionReturn(PETSC_SUCCESS);
1388 }
1389 
1390 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1391 {
1392   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1393 
1394   PetscFunctionBegin;
1395   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1396   baij->getrowactive = PETSC_FALSE;
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1401 {
1402   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1403 
1404   PetscFunctionBegin;
1405   PetscCall(MatZeroEntries(l->A));
1406   PetscCall(MatZeroEntries(l->B));
1407   PetscFunctionReturn(PETSC_SUCCESS);
1408 }
1409 
1410 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1411 {
1412   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1413   Mat            A = a->A, B = a->B;
1414   PetscLogDouble isend[5], irecv[5];
1415 
1416   PetscFunctionBegin;
1417   info->block_size = (PetscReal)matin->rmap->bs;
1418 
1419   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1420 
1421   isend[0] = info->nz_used;
1422   isend[1] = info->nz_allocated;
1423   isend[2] = info->nz_unneeded;
1424   isend[3] = info->memory;
1425   isend[4] = info->mallocs;
1426 
1427   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1428 
1429   isend[0] += info->nz_used;
1430   isend[1] += info->nz_allocated;
1431   isend[2] += info->nz_unneeded;
1432   isend[3] += info->memory;
1433   isend[4] += info->mallocs;
1434 
1435   if (flag == MAT_LOCAL) {
1436     info->nz_used      = isend[0];
1437     info->nz_allocated = isend[1];
1438     info->nz_unneeded  = isend[2];
1439     info->memory       = isend[3];
1440     info->mallocs      = isend[4];
1441   } else if (flag == MAT_GLOBAL_MAX) {
1442     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1443 
1444     info->nz_used      = irecv[0];
1445     info->nz_allocated = irecv[1];
1446     info->nz_unneeded  = irecv[2];
1447     info->memory       = irecv[3];
1448     info->mallocs      = irecv[4];
1449   } else if (flag == MAT_GLOBAL_SUM) {
1450     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1451 
1452     info->nz_used      = irecv[0];
1453     info->nz_allocated = irecv[1];
1454     info->nz_unneeded  = irecv[2];
1455     info->memory       = irecv[3];
1456     info->mallocs      = irecv[4];
1457   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1458   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1459   info->fill_ratio_needed = 0;
1460   info->factor_mallocs    = 0;
1461   PetscFunctionReturn(PETSC_SUCCESS);
1462 }
1463 
1464 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1465 {
1466   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1467 
1468   PetscFunctionBegin;
1469   switch (op) {
1470   case MAT_NEW_NONZERO_LOCATIONS:
1471   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1472   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1473   case MAT_KEEP_NONZERO_PATTERN:
1474   case MAT_NEW_NONZERO_LOCATION_ERR:
1475     MatCheckPreallocated(A, 1);
1476     PetscCall(MatSetOption(a->A, op, flg));
1477     PetscCall(MatSetOption(a->B, op, flg));
1478     break;
1479   case MAT_ROW_ORIENTED:
1480     MatCheckPreallocated(A, 1);
1481     a->roworiented = flg;
1482 
1483     PetscCall(MatSetOption(a->A, op, flg));
1484     PetscCall(MatSetOption(a->B, op, flg));
1485     break;
1486   case MAT_FORCE_DIAGONAL_ENTRIES:
1487   case MAT_SORTED_FULL:
1488   case MAT_SUBMAT_SINGLEIS:
1489     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1490     break;
1491   case MAT_IGNORE_OFF_PROC_ENTRIES:
1492     a->donotstash = flg;
1493     break;
1494   case MAT_USE_HASH_TABLE:
1495     a->ht_flag = flg;
1496     a->ht_fact = 1.39;
1497     break;
1498   case MAT_SYMMETRIC:
1499   case MAT_STRUCTURALLY_SYMMETRIC:
1500   case MAT_HERMITIAN:
1501   case MAT_SYMMETRY_ETERNAL:
1502   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1503   case MAT_SPD_ETERNAL:
1504     /* if the diagonal matrix is square it inherits some of the properties above */
1505     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1506     break;
1507   default:
1508     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1509   }
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 }
1512 
1513 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1514 {
1515   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1516   Mat_SeqBAIJ *Aloc;
1517   Mat          B;
1518   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1519   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1520   MatScalar   *a;
1521 
1522   PetscFunctionBegin;
1523   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1524   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1525     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1526     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1527     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1528     /* Do not know preallocation information, but must set block size */
1529     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1530   } else {
1531     B = *matout;
1532   }
1533 
1534   /* copy over the A part */
1535   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1536   ai   = Aloc->i;
1537   aj   = Aloc->j;
1538   a    = Aloc->a;
1539   PetscCall(PetscMalloc1(bs, &rvals));
1540 
1541   for (i = 0; i < mbs; i++) {
1542     rvals[0] = bs * (baij->rstartbs + i);
1543     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1544     for (j = ai[i]; j < ai[i + 1]; j++) {
1545       col = (baij->cstartbs + aj[j]) * bs;
1546       for (k = 0; k < bs; k++) {
1547         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1548 
1549         col++;
1550         a += bs;
1551       }
1552     }
1553   }
1554   /* copy over the B part */
1555   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1556   ai   = Aloc->i;
1557   aj   = Aloc->j;
1558   a    = Aloc->a;
1559   for (i = 0; i < mbs; i++) {
1560     rvals[0] = bs * (baij->rstartbs + i);
1561     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1562     for (j = ai[i]; j < ai[i + 1]; j++) {
1563       col = baij->garray[aj[j]] * bs;
1564       for (k = 0; k < bs; k++) {
1565         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1566         col++;
1567         a += bs;
1568       }
1569     }
1570   }
1571   PetscCall(PetscFree(rvals));
1572   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1574 
1575   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1576   else PetscCall(MatHeaderMerge(A, &B));
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1581 {
1582   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1583   Mat          a = baij->A, b = baij->B;
1584   PetscInt     s1, s2, s3;
1585 
1586   PetscFunctionBegin;
1587   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1588   if (rr) {
1589     PetscCall(VecGetLocalSize(rr, &s1));
1590     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1591     /* Overlap communication with computation. */
1592     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593   }
1594   if (ll) {
1595     PetscCall(VecGetLocalSize(ll, &s1));
1596     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1597     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1598   }
1599   /* scale  the diagonal block */
1600   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1601 
1602   if (rr) {
1603     /* Do a scatter end and then right scale the off-diagonal block */
1604     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1605     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1606   }
1607   PetscFunctionReturn(PETSC_SUCCESS);
1608 }
1609 
1610 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1611 {
1612   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1613   PetscInt    *lrows;
1614   PetscInt     r, len;
1615   PetscBool    cong;
1616 
1617   PetscFunctionBegin;
1618   /* get locally owned rows */
1619   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1620   /* fix right-hand side if needed */
1621   if (x && b) {
1622     const PetscScalar *xx;
1623     PetscScalar       *bb;
1624 
1625     PetscCall(VecGetArrayRead(x, &xx));
1626     PetscCall(VecGetArray(b, &bb));
1627     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1628     PetscCall(VecRestoreArrayRead(x, &xx));
1629     PetscCall(VecRestoreArray(b, &bb));
1630   }
1631 
1632   /* actually zap the local rows */
1633   /*
1634         Zero the required rows. If the "diagonal block" of the matrix
1635      is square and the user wishes to set the diagonal we use separate
1636      code so that MatSetValues() is not called for each diagonal allocating
1637      new memory, thus calling lots of mallocs and slowing things down.
1638 
1639   */
1640   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1641   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1642   PetscCall(MatHasCongruentLayouts(A, &cong));
1643   if ((diag != 0.0) && cong) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1645   } else if (diag != 0.0) {
1646     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1647     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1648     for (r = 0; r < len; ++r) {
1649       const PetscInt row = lrows[r] + A->rmap->rstart;
1650       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1651     }
1652     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1653     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1654   } else {
1655     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1656   }
1657   PetscCall(PetscFree(lrows));
1658 
1659   /* only change matrix nonzero state if pattern was allowed to be changed */
1660   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1661     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1662     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1663   }
1664   PetscFunctionReturn(PETSC_SUCCESS);
1665 }
1666 
1667 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1668 {
1669   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1670   PetscMPIInt        n, p = 0;
1671   PetscInt           i, j, k, r, len = 0, row, col, count;
1672   PetscInt          *lrows, *owners = A->rmap->range;
1673   PetscSFNode       *rrows;
1674   PetscSF            sf;
1675   const PetscScalar *xx;
1676   PetscScalar       *bb, *mask;
1677   Vec                xmask, lmask;
1678   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1679   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1680   PetscScalar       *aa;
1681 
1682   PetscFunctionBegin;
1683   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1684   /* Create SF where leaves are input rows and roots are owned rows */
1685   PetscCall(PetscMalloc1(n, &lrows));
1686   for (r = 0; r < n; ++r) lrows[r] = -1;
1687   PetscCall(PetscMalloc1(N, &rrows));
1688   for (r = 0; r < N; ++r) {
1689     const PetscInt idx = rows[r];
1690     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1691     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1692       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1693     }
1694     rrows[r].rank  = p;
1695     rrows[r].index = rows[r] - owners[p];
1696   }
1697   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1698   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1699   /* Collect flags for rows to be zeroed */
1700   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1701   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1702   PetscCall(PetscSFDestroy(&sf));
1703   /* Compress and put in row numbers */
1704   for (r = 0; r < n; ++r)
1705     if (lrows[r] >= 0) lrows[len++] = r;
1706   /* zero diagonal part of matrix */
1707   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1708   /* handle off-diagonal part of matrix */
1709   PetscCall(MatCreateVecs(A, &xmask, NULL));
1710   PetscCall(VecDuplicate(l->lvec, &lmask));
1711   PetscCall(VecGetArray(xmask, &bb));
1712   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1713   PetscCall(VecRestoreArray(xmask, &bb));
1714   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1715   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1716   PetscCall(VecDestroy(&xmask));
1717   if (x) {
1718     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1719     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1720     PetscCall(VecGetArrayRead(l->lvec, &xx));
1721     PetscCall(VecGetArray(b, &bb));
1722   }
1723   PetscCall(VecGetArray(lmask, &mask));
1724   /* remove zeroed rows of off-diagonal matrix */
1725   for (i = 0; i < len; ++i) {
1726     row   = lrows[i];
1727     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1728     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
1729     for (k = 0; k < count; ++k) {
1730       aa[0] = 0.0;
1731       aa += bs;
1732     }
1733   }
1734   /* loop over all elements of off process part of matrix zeroing removed columns*/
1735   for (i = 0; i < l->B->rmap->N; ++i) {
1736     row = i / bs;
1737     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1738       for (k = 0; k < bs; ++k) {
1739         col = bs * baij->j[j] + k;
1740         if (PetscAbsScalar(mask[col])) {
1741           aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
1742           if (x) bb[i] -= aa[0] * xx[col];
1743           aa[0] = 0.0;
1744         }
1745       }
1746     }
1747   }
1748   if (x) {
1749     PetscCall(VecRestoreArray(b, &bb));
1750     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1751   }
1752   PetscCall(VecRestoreArray(lmask, &mask));
1753   PetscCall(VecDestroy(&lmask));
1754   PetscCall(PetscFree(lrows));
1755 
1756   /* only change matrix nonzero state if pattern was allowed to be changed */
1757   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1758     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1759     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1760   }
1761   PetscFunctionReturn(PETSC_SUCCESS);
1762 }
1763 
1764 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1765 {
1766   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1767 
1768   PetscFunctionBegin;
1769   PetscCall(MatSetUnfactored(a->A));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1774 
1775 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1776 {
1777   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1778   Mat          a, b, c, d;
1779   PetscBool    flg;
1780 
1781   PetscFunctionBegin;
1782   a = matA->A;
1783   b = matA->B;
1784   c = matB->A;
1785   d = matB->B;
1786 
1787   PetscCall(MatEqual(a, c, &flg));
1788   if (flg) PetscCall(MatEqual(b, d, &flg));
1789   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1790   PetscFunctionReturn(PETSC_SUCCESS);
1791 }
1792 
1793 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1794 {
1795   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1796   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     PetscCall(MatCopy_Basic(A, B, str));
1802   } else {
1803     PetscCall(MatCopy(a->A, b->A, str));
1804     PetscCall(MatCopy(a->B, b->B, str));
1805   }
1806   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1811 {
1812   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1813   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1814   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1815 
1816   PetscFunctionBegin;
1817   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1822 {
1823   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1824   PetscBLASInt bnz, one                         = 1;
1825   Mat_SeqBAIJ *x, *y;
1826   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1827 
1828   PetscFunctionBegin;
1829   if (str == SAME_NONZERO_PATTERN) {
1830     PetscScalar alpha = a;
1831     x                 = (Mat_SeqBAIJ *)xx->A->data;
1832     y                 = (Mat_SeqBAIJ *)yy->A->data;
1833     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1834     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1835     x = (Mat_SeqBAIJ *)xx->B->data;
1836     y = (Mat_SeqBAIJ *)yy->B->data;
1837     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1838     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1839     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1840   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1841     PetscCall(MatAXPY_Basic(Y, a, X, str));
1842   } else {
1843     Mat       B;
1844     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1845     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1846     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1847     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1848     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1849     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1850     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1851     PetscCall(MatSetType(B, MATMPIBAIJ));
1852     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1853     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1854     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1855     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1856     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1857     PetscCall(MatHeaderMerge(Y, &B));
1858     PetscCall(PetscFree(nnz_d));
1859     PetscCall(PetscFree(nnz_o));
1860   }
1861   PetscFunctionReturn(PETSC_SUCCESS);
1862 }
1863 
1864 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1865 {
1866   PetscFunctionBegin;
1867   if (PetscDefined(USE_COMPLEX)) {
1868     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1869 
1870     PetscCall(MatConjugate_SeqBAIJ(a->A));
1871     PetscCall(MatConjugate_SeqBAIJ(a->B));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1877 {
1878   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1879 
1880   PetscFunctionBegin;
1881   PetscCall(MatRealPart(a->A));
1882   PetscCall(MatRealPart(a->B));
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1887 {
1888   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(MatImaginaryPart(a->A));
1892   PetscCall(MatImaginaryPart(a->B));
1893   PetscFunctionReturn(PETSC_SUCCESS);
1894 }
1895 
1896 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1897 {
1898   IS       iscol_local;
1899   PetscInt csize;
1900 
1901   PetscFunctionBegin;
1902   PetscCall(ISGetLocalSize(iscol, &csize));
1903   if (call == MAT_REUSE_MATRIX) {
1904     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1905     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1906   } else {
1907     PetscCall(ISAllGather(iscol, &iscol_local));
1908   }
1909   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1910   if (call == MAT_INITIAL_MATRIX) {
1911     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1912     PetscCall(ISDestroy(&iscol_local));
1913   }
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 /*
1918   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1919   in local and then by concatenating the local matrices the end result.
1920   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1921   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1922 */
1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1924 {
1925   PetscMPIInt  rank, size;
1926   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1927   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1928   Mat          M, Mreuse;
1929   MatScalar   *vwork, *aa;
1930   MPI_Comm     comm;
1931   IS           isrow_new, iscol_new;
1932   Mat_SeqBAIJ *aij;
1933 
1934   PetscFunctionBegin;
1935   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1936   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1937   PetscCallMPI(MPI_Comm_size(comm, &size));
1938   /* The compression and expansion should be avoided. Doesn't point
1939      out errors, might change the indices, hence buggey */
1940   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1941   if (isrow == iscol) {
1942     iscol_new = isrow_new;
1943     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1944   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1945 
1946   if (call == MAT_REUSE_MATRIX) {
1947     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1948     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1949     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1950   } else {
1951     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1952   }
1953   PetscCall(ISDestroy(&isrow_new));
1954   PetscCall(ISDestroy(&iscol_new));
1955   /*
1956       m - number of local rows
1957       n - number of columns (same on all processors)
1958       rstart - first row in new global matrix generated
1959   */
1960   PetscCall(MatGetBlockSize(mat, &bs));
1961   PetscCall(MatGetSize(Mreuse, &m, &n));
1962   m = m / bs;
1963   n = n / bs;
1964 
1965   if (call == MAT_INITIAL_MATRIX) {
1966     aij = (Mat_SeqBAIJ *)Mreuse->data;
1967     ii  = aij->i;
1968     jj  = aij->j;
1969 
1970     /*
1971         Determine the number of non-zeros in the diagonal and off-diagonal
1972         portions of the matrix in order to do correct preallocation
1973     */
1974 
1975     /* first get start and end of "diagonal" columns */
1976     if (csize == PETSC_DECIDE) {
1977       PetscCall(ISGetSize(isrow, &mglobal));
1978       if (mglobal == n * bs) { /* square matrix */
1979         nlocal = m;
1980       } else {
1981         nlocal = n / size + ((n % size) > rank);
1982       }
1983     } else {
1984       nlocal = csize / bs;
1985     }
1986     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1987     rstart = rend - nlocal;
1988     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1989 
1990     /* next, compute all the lengths */
1991     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1992     for (i = 0; i < m; i++) {
1993       jend = ii[i + 1] - ii[i];
1994       olen = 0;
1995       dlen = 0;
1996       for (j = 0; j < jend; j++) {
1997         if (*jj < rstart || *jj >= rend) olen++;
1998         else dlen++;
1999         jj++;
2000       }
2001       olens[i] = olen;
2002       dlens[i] = dlen;
2003     }
2004     PetscCall(MatCreate(comm, &M));
2005     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2006     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2007     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2008     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2009     PetscCall(PetscFree2(dlens, olens));
2010   } else {
2011     PetscInt ml, nl;
2012 
2013     M = *newmat;
2014     PetscCall(MatGetLocalSize(M, &ml, &nl));
2015     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2016     PetscCall(MatZeroEntries(M));
2017     /*
2018          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2019        rather than the slower MatSetValues().
2020     */
2021     M->was_assembled = PETSC_TRUE;
2022     M->assembled     = PETSC_FALSE;
2023   }
2024   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2025   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2026   aij = (Mat_SeqBAIJ *)Mreuse->data;
2027   ii  = aij->i;
2028   jj  = aij->j;
2029   aa  = aij->a;
2030   for (i = 0; i < m; i++) {
2031     row   = rstart / bs + i;
2032     nz    = ii[i + 1] - ii[i];
2033     cwork = jj;
2034     jj    = PetscSafePointerPlusOffset(jj, nz);
2035     vwork = aa;
2036     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2037     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2038   }
2039 
2040   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2041   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2042   *newmat = M;
2043 
2044   /* save submatrix used in processor for next request */
2045   if (call == MAT_INITIAL_MATRIX) {
2046     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2047     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2048   }
2049   PetscFunctionReturn(PETSC_SUCCESS);
2050 }
2051 
2052 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2053 {
2054   MPI_Comm        comm, pcomm;
2055   PetscInt        clocal_size, nrows;
2056   const PetscInt *rows;
2057   PetscMPIInt     size;
2058   IS              crowp, lcolp;
2059 
2060   PetscFunctionBegin;
2061   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2062   /* make a collective version of 'rowp' */
2063   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2064   if (pcomm == comm) {
2065     crowp = rowp;
2066   } else {
2067     PetscCall(ISGetSize(rowp, &nrows));
2068     PetscCall(ISGetIndices(rowp, &rows));
2069     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2070     PetscCall(ISRestoreIndices(rowp, &rows));
2071   }
2072   PetscCall(ISSetPermutation(crowp));
2073   /* make a local version of 'colp' */
2074   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2075   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2076   if (size == 1) {
2077     lcolp = colp;
2078   } else {
2079     PetscCall(ISAllGather(colp, &lcolp));
2080   }
2081   PetscCall(ISSetPermutation(lcolp));
2082   /* now we just get the submatrix */
2083   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2084   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2085   /* clean up */
2086   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2087   if (size > 1) PetscCall(ISDestroy(&lcolp));
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2092 {
2093   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2094   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2095 
2096   PetscFunctionBegin;
2097   if (nghosts) *nghosts = B->nbs;
2098   if (ghosts) *ghosts = baij->garray;
2099   PetscFunctionReturn(PETSC_SUCCESS);
2100 }
2101 
2102 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2103 {
2104   Mat          B;
2105   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2106   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2107   Mat_SeqAIJ  *b;
2108   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2109   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2110   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2111 
2112   PetscFunctionBegin;
2113   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2114   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2115 
2116   /*   Tell every processor the number of nonzeros per row  */
2117   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2118   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2119   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2120   displs = recvcounts + size;
2121   for (i = 0; i < size; i++) {
2122     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2123     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2124   }
2125   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2126   /* Create the sequential matrix of the same type as the local block diagonal  */
2127   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2128   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2129   PetscCall(MatSetType(B, MATSEQAIJ));
2130   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2131   b = (Mat_SeqAIJ *)B->data;
2132 
2133   /*     Copy my part of matrix column indices over  */
2134   sendcount  = ad->nz + bd->nz;
2135   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2136   a_jsendbuf = ad->j;
2137   b_jsendbuf = bd->j;
2138   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2139   cnt        = 0;
2140   for (i = 0; i < n; i++) {
2141     /* put in lower diagonal portion */
2142     m = bd->i[i + 1] - bd->i[i];
2143     while (m > 0) {
2144       /* is it above diagonal (in bd (compressed) numbering) */
2145       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2146       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2147       m--;
2148     }
2149 
2150     /* put in diagonal portion */
2151     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2152 
2153     /* put in upper diagonal portion */
2154     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2155   }
2156   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2157 
2158   /*  Gather all column indices to all processors  */
2159   for (i = 0; i < size; i++) {
2160     recvcounts[i] = 0;
2161     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2162   }
2163   displs[0] = 0;
2164   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2165   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2166   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2167   /* set the b->ilen (length of each row) values */
2168   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2169   /* set the b->i indices */
2170   b->i[0] = 0;
2171   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2172   PetscCall(PetscFree(lens));
2173   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2174   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2175   PetscCall(PetscFree(recvcounts));
2176 
2177   PetscCall(MatPropagateSymmetryOptions(A, B));
2178   *newmat = B;
2179   PetscFunctionReturn(PETSC_SUCCESS);
2180 }
2181 
2182 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2183 {
2184   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2185   Vec          bb1 = NULL;
2186 
2187   PetscFunctionBegin;
2188   if (flag == SOR_APPLY_UPPER) {
2189     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2190     PetscFunctionReturn(PETSC_SUCCESS);
2191   }
2192 
2193   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2194 
2195   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2196     if (flag & SOR_ZERO_INITIAL_GUESS) {
2197       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2198       its--;
2199     }
2200 
2201     while (its--) {
2202       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2203       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2204 
2205       /* update rhs: bb1 = bb - B*x */
2206       PetscCall(VecScale(mat->lvec, -1.0));
2207       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2208 
2209       /* local sweep */
2210       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2211     }
2212   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2213     if (flag & SOR_ZERO_INITIAL_GUESS) {
2214       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2215       its--;
2216     }
2217     while (its--) {
2218       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2219       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2220 
2221       /* update rhs: bb1 = bb - B*x */
2222       PetscCall(VecScale(mat->lvec, -1.0));
2223       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2224 
2225       /* local sweep */
2226       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2227     }
2228   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2229     if (flag & SOR_ZERO_INITIAL_GUESS) {
2230       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2231       its--;
2232     }
2233     while (its--) {
2234       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2235       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2236 
2237       /* update rhs: bb1 = bb - B*x */
2238       PetscCall(VecScale(mat->lvec, -1.0));
2239       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2240 
2241       /* local sweep */
2242       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2243     }
2244   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2245 
2246   PetscCall(VecDestroy(&bb1));
2247   PetscFunctionReturn(PETSC_SUCCESS);
2248 }
2249 
2250 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2251 {
2252   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2253   PetscInt     m, N, i, *garray = aij->garray;
2254   PetscInt     ib, jb, bs = A->rmap->bs;
2255   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2256   MatScalar   *a_val = a_aij->a;
2257   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2258   MatScalar   *b_val = b_aij->a;
2259   PetscReal   *work;
2260   PetscMPIInt  iN;
2261 
2262   PetscFunctionBegin;
2263   PetscCall(MatGetSize(A, &m, &N));
2264   PetscCall(PetscCalloc1(N, &work));
2265   if (type == NORM_2) {
2266     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2267       for (jb = 0; jb < bs; jb++) {
2268         for (ib = 0; ib < bs; ib++) {
2269           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2270           a_val++;
2271         }
2272       }
2273     }
2274     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2275       for (jb = 0; jb < bs; jb++) {
2276         for (ib = 0; ib < bs; ib++) {
2277           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2278           b_val++;
2279         }
2280       }
2281     }
2282   } else if (type == NORM_1) {
2283     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2284       for (jb = 0; jb < bs; jb++) {
2285         for (ib = 0; ib < bs; ib++) {
2286           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2287           a_val++;
2288         }
2289       }
2290     }
2291     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2292       for (jb = 0; jb < bs; jb++) {
2293         for (ib = 0; ib < bs; ib++) {
2294           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2295           b_val++;
2296         }
2297       }
2298     }
2299   } else if (type == NORM_INFINITY) {
2300     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2301       for (jb = 0; jb < bs; jb++) {
2302         for (ib = 0; ib < bs; ib++) {
2303           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2304           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2305           a_val++;
2306         }
2307       }
2308     }
2309     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2310       for (jb = 0; jb < bs; jb++) {
2311         for (ib = 0; ib < bs; ib++) {
2312           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2313           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2314           b_val++;
2315         }
2316       }
2317     }
2318   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2319     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2320       for (jb = 0; jb < bs; jb++) {
2321         for (ib = 0; ib < bs; ib++) {
2322           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2323           a_val++;
2324         }
2325       }
2326     }
2327     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2328       for (jb = 0; jb < bs; jb++) {
2329         for (ib = 0; ib < bs; ib++) {
2330           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2331           b_val++;
2332         }
2333       }
2334     }
2335   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2336     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2337       for (jb = 0; jb < bs; jb++) {
2338         for (ib = 0; ib < bs; ib++) {
2339           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2340           a_val++;
2341         }
2342       }
2343     }
2344     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2345       for (jb = 0; jb < bs; jb++) {
2346         for (ib = 0; ib < bs; ib++) {
2347           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2348           b_val++;
2349         }
2350       }
2351     }
2352   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2353   PetscCall(PetscMPIIntCast(N, &iN));
2354   if (type == NORM_INFINITY) {
2355     PetscCallMPI(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2356   } else {
2357     PetscCallMPI(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2358   }
2359   PetscCall(PetscFree(work));
2360   if (type == NORM_2) {
2361     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2362   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2363     for (i = 0; i < N; i++) reductions[i] /= m;
2364   }
2365   PetscFunctionReturn(PETSC_SUCCESS);
2366 }
2367 
2368 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2369 {
2370   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2371 
2372   PetscFunctionBegin;
2373   PetscCall(MatInvertBlockDiagonal(a->A, values));
2374   A->factorerrortype             = a->A->factorerrortype;
2375   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2376   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2377   PetscFunctionReturn(PETSC_SUCCESS);
2378 }
2379 
2380 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2381 {
2382   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2383   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2384 
2385   PetscFunctionBegin;
2386   if (!Y->preallocated) {
2387     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2388   } else if (!aij->nz) {
2389     PetscInt nonew = aij->nonew;
2390     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2391     aij->nonew = nonew;
2392   }
2393   PetscCall(MatShift_Basic(Y, a));
2394   PetscFunctionReturn(PETSC_SUCCESS);
2395 }
2396 
2397 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2398 {
2399   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2400 
2401   PetscFunctionBegin;
2402   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2403   PetscCall(MatMissingDiagonal(a->A, missing, d));
2404   if (d) {
2405     PetscInt rstart;
2406     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2407     *d += rstart / A->rmap->bs;
2408   }
2409   PetscFunctionReturn(PETSC_SUCCESS);
2410 }
2411 
2412 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2413 {
2414   PetscFunctionBegin;
2415   *a = ((Mat_MPIBAIJ *)A->data)->A;
2416   PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418 
2419 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2420 {
2421   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2422 
2423   PetscFunctionBegin;
2424   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2425   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2426   PetscFunctionReturn(PETSC_SUCCESS);
2427 }
2428 
2429 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2430                                        MatGetRow_MPIBAIJ,
2431                                        MatRestoreRow_MPIBAIJ,
2432                                        MatMult_MPIBAIJ,
2433                                        /* 4*/ MatMultAdd_MPIBAIJ,
2434                                        MatMultTranspose_MPIBAIJ,
2435                                        MatMultTransposeAdd_MPIBAIJ,
2436                                        NULL,
2437                                        NULL,
2438                                        NULL,
2439                                        /*10*/ NULL,
2440                                        NULL,
2441                                        NULL,
2442                                        MatSOR_MPIBAIJ,
2443                                        MatTranspose_MPIBAIJ,
2444                                        /*15*/ MatGetInfo_MPIBAIJ,
2445                                        MatEqual_MPIBAIJ,
2446                                        MatGetDiagonal_MPIBAIJ,
2447                                        MatDiagonalScale_MPIBAIJ,
2448                                        MatNorm_MPIBAIJ,
2449                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2450                                        MatAssemblyEnd_MPIBAIJ,
2451                                        MatSetOption_MPIBAIJ,
2452                                        MatZeroEntries_MPIBAIJ,
2453                                        /*24*/ MatZeroRows_MPIBAIJ,
2454                                        NULL,
2455                                        NULL,
2456                                        NULL,
2457                                        NULL,
2458                                        /*29*/ MatSetUp_MPI_Hash,
2459                                        NULL,
2460                                        NULL,
2461                                        MatGetDiagonalBlock_MPIBAIJ,
2462                                        NULL,
2463                                        /*34*/ MatDuplicate_MPIBAIJ,
2464                                        NULL,
2465                                        NULL,
2466                                        NULL,
2467                                        NULL,
2468                                        /*39*/ MatAXPY_MPIBAIJ,
2469                                        MatCreateSubMatrices_MPIBAIJ,
2470                                        MatIncreaseOverlap_MPIBAIJ,
2471                                        MatGetValues_MPIBAIJ,
2472                                        MatCopy_MPIBAIJ,
2473                                        /*44*/ NULL,
2474                                        MatScale_MPIBAIJ,
2475                                        MatShift_MPIBAIJ,
2476                                        NULL,
2477                                        MatZeroRowsColumns_MPIBAIJ,
2478                                        /*49*/ NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        NULL,
2483                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2484                                        NULL,
2485                                        MatSetUnfactored_MPIBAIJ,
2486                                        MatPermute_MPIBAIJ,
2487                                        MatSetValuesBlocked_MPIBAIJ,
2488                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2489                                        MatDestroy_MPIBAIJ,
2490                                        MatView_MPIBAIJ,
2491                                        NULL,
2492                                        NULL,
2493                                        /*64*/ NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        /*74*/ NULL,
2504                                        MatFDColoringApply_BAIJ,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        /*79*/ NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        MatLoad_MPIBAIJ,
2513                                        /*84*/ NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        /*89*/ NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        /*94*/ NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                        /*99*/ NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        MatConjugate_MPIBAIJ,
2532                                        NULL,
2533                                        /*104*/ NULL,
2534                                        MatRealPart_MPIBAIJ,
2535                                        MatImaginaryPart_MPIBAIJ,
2536                                        NULL,
2537                                        NULL,
2538                                        /*109*/ NULL,
2539                                        NULL,
2540                                        NULL,
2541                                        NULL,
2542                                        MatMissingDiagonal_MPIBAIJ,
2543                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2544                                        NULL,
2545                                        MatGetGhosts_MPIBAIJ,
2546                                        NULL,
2547                                        NULL,
2548                                        /*119*/ NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        NULL,
2552                                        MatGetMultiProcBlock_MPIBAIJ,
2553                                        /*124*/ NULL,
2554                                        MatGetColumnReductions_MPIBAIJ,
2555                                        MatInvertBlockDiagonal_MPIBAIJ,
2556                                        NULL,
2557                                        NULL,
2558                                        /*129*/ NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        /*134*/ NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        NULL,
2568                                        /*139*/ MatSetBlockSizes_Default,
2569                                        NULL,
2570                                        NULL,
2571                                        MatFDColoringSetUp_MPIXAIJ,
2572                                        NULL,
2573                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        NULL,
2578                                        NULL,
2579                                        /*150*/ NULL,
2580                                        MatEliminateZeros_MPIBAIJ,
2581                                        MatGetRowSumAbs_MPIBAIJ,
2582                                        NULL,
2583                                        NULL,
2584                                        NULL};
2585 
2586 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2587 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2588 
2589 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2590 {
2591   PetscInt        m, rstart, cstart, cend;
2592   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2593   const PetscInt *JJ          = NULL;
2594   PetscScalar    *values      = NULL;
2595   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2596   PetscBool       nooffprocentries;
2597 
2598   PetscFunctionBegin;
2599   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2600   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2601   PetscCall(PetscLayoutSetUp(B->rmap));
2602   PetscCall(PetscLayoutSetUp(B->cmap));
2603   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2604   m      = B->rmap->n / bs;
2605   rstart = B->rmap->rstart / bs;
2606   cstart = B->cmap->rstart / bs;
2607   cend   = B->cmap->rend / bs;
2608 
2609   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2610   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2611   for (i = 0; i < m; i++) {
2612     nz = ii[i + 1] - ii[i];
2613     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2614     nz_max = PetscMax(nz_max, nz);
2615     dlen   = 0;
2616     olen   = 0;
2617     JJ     = jj + ii[i];
2618     for (j = 0; j < nz; j++) {
2619       if (*JJ < cstart || *JJ >= cend) olen++;
2620       else dlen++;
2621       JJ++;
2622     }
2623     d_nnz[i] = dlen;
2624     o_nnz[i] = olen;
2625   }
2626   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2627   PetscCall(PetscFree2(d_nnz, o_nnz));
2628 
2629   values = (PetscScalar *)V;
2630   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2631   for (i = 0; i < m; i++) {
2632     PetscInt        row   = i + rstart;
2633     PetscInt        ncols = ii[i + 1] - ii[i];
2634     const PetscInt *icols = jj + ii[i];
2635     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2636       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2637       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2638     } else { /* block ordering does not match so we can only insert one block at a time. */
2639       PetscInt j;
2640       for (j = 0; j < ncols; j++) {
2641         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2642         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2643       }
2644     }
2645   }
2646 
2647   if (!V) PetscCall(PetscFree(values));
2648   nooffprocentries    = B->nooffprocentries;
2649   B->nooffprocentries = PETSC_TRUE;
2650   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2651   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2652   B->nooffprocentries = nooffprocentries;
2653 
2654   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2655   PetscFunctionReturn(PETSC_SUCCESS);
2656 }
2657 
2658 /*@C
2659   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2660 
2661   Collective
2662 
2663   Input Parameters:
2664 + B  - the matrix
2665 . bs - the block size
2666 . i  - the indices into `j` for the start of each local row (starts with zero)
2667 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2668 - v  - optional values in the matrix, use `NULL` if not provided
2669 
2670   Level: advanced
2671 
2672   Notes:
2673   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2674   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2675   called this routine.
2676 
2677   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2678   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2679   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2680   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2681   block column and the second index is over columns within a block.
2682 
2683   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2684 
2685 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2686 @*/
2687 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2688 {
2689   PetscFunctionBegin;
2690   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2691   PetscValidType(B, 1);
2692   PetscValidLogicalCollectiveInt(B, bs, 2);
2693   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2694   PetscFunctionReturn(PETSC_SUCCESS);
2695 }
2696 
2697 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2698 {
2699   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2700   PetscInt     i;
2701   PetscMPIInt  size;
2702 
2703   PetscFunctionBegin;
2704   if (B->hash_active) {
2705     B->ops[0]      = b->cops;
2706     B->hash_active = PETSC_FALSE;
2707   }
2708   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2709   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2710   PetscCall(PetscLayoutSetUp(B->rmap));
2711   PetscCall(PetscLayoutSetUp(B->cmap));
2712   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2713 
2714   if (d_nnz) {
2715     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2716   }
2717   if (o_nnz) {
2718     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2719   }
2720 
2721   b->bs2 = bs * bs;
2722   b->mbs = B->rmap->n / bs;
2723   b->nbs = B->cmap->n / bs;
2724   b->Mbs = B->rmap->N / bs;
2725   b->Nbs = B->cmap->N / bs;
2726 
2727   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2728   b->rstartbs = B->rmap->rstart / bs;
2729   b->rendbs   = B->rmap->rend / bs;
2730   b->cstartbs = B->cmap->rstart / bs;
2731   b->cendbs   = B->cmap->rend / bs;
2732 
2733 #if defined(PETSC_USE_CTABLE)
2734   PetscCall(PetscHMapIDestroy(&b->colmap));
2735 #else
2736   PetscCall(PetscFree(b->colmap));
2737 #endif
2738   PetscCall(PetscFree(b->garray));
2739   PetscCall(VecDestroy(&b->lvec));
2740   PetscCall(VecScatterDestroy(&b->Mvctx));
2741 
2742   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2743 
2744   MatSeqXAIJGetOptions_Private(b->B);
2745   PetscCall(MatDestroy(&b->B));
2746   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2747   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2748   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2749   MatSeqXAIJRestoreOptions_Private(b->B);
2750 
2751   MatSeqXAIJGetOptions_Private(b->A);
2752   PetscCall(MatDestroy(&b->A));
2753   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2754   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2755   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2756   MatSeqXAIJRestoreOptions_Private(b->A);
2757 
2758   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2759   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2760   B->preallocated  = PETSC_TRUE;
2761   B->was_assembled = PETSC_FALSE;
2762   B->assembled     = PETSC_FALSE;
2763   PetscFunctionReturn(PETSC_SUCCESS);
2764 }
2765 
2766 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2767 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2768 
2769 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2770 {
2771   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2772   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2773   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2774   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2775 
2776   PetscFunctionBegin;
2777   PetscCall(PetscMalloc1(M + 1, &ii));
2778   ii[0] = 0;
2779   for (i = 0; i < M; i++) {
2780     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2781     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2782     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2783     /* remove one from count of matrix has diagonal */
2784     for (j = id[i]; j < id[i + 1]; j++) {
2785       if (jd[j] == i) {
2786         ii[i + 1]--;
2787         break;
2788       }
2789     }
2790   }
2791   PetscCall(PetscMalloc1(ii[M], &jj));
2792   cnt = 0;
2793   for (i = 0; i < M; i++) {
2794     for (j = io[i]; j < io[i + 1]; j++) {
2795       if (garray[jo[j]] > rstart) break;
2796       jj[cnt++] = garray[jo[j]];
2797     }
2798     for (k = id[i]; k < id[i + 1]; k++) {
2799       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2800     }
2801     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2802   }
2803   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2804   PetscFunctionReturn(PETSC_SUCCESS);
2805 }
2806 
2807 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2808 
2809 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2810 
2811 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2812 {
2813   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2814   Mat_MPIAIJ  *b;
2815   Mat          B;
2816 
2817   PetscFunctionBegin;
2818   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2819 
2820   if (reuse == MAT_REUSE_MATRIX) {
2821     B = *newmat;
2822   } else {
2823     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2824     PetscCall(MatSetType(B, MATMPIAIJ));
2825     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2826     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2827     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2828     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2829   }
2830   b = (Mat_MPIAIJ *)B->data;
2831 
2832   if (reuse == MAT_REUSE_MATRIX) {
2833     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2834     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2835   } else {
2836     PetscInt   *garray = a->garray;
2837     Mat_SeqAIJ *bB;
2838     PetscInt    bs, nnz;
2839     PetscCall(MatDestroy(&b->A));
2840     PetscCall(MatDestroy(&b->B));
2841     /* just clear out the data structure */
2842     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2843     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2844     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2845 
2846     /* Global numbering for b->B columns */
2847     bB  = (Mat_SeqAIJ *)b->B->data;
2848     bs  = A->rmap->bs;
2849     nnz = bB->i[A->rmap->n];
2850     for (PetscInt k = 0; k < nnz; k++) {
2851       PetscInt bj = bB->j[k] / bs;
2852       PetscInt br = bB->j[k] % bs;
2853       bB->j[k]    = garray[bj] * bs + br;
2854     }
2855   }
2856   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2857   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2858 
2859   if (reuse == MAT_INPLACE_MATRIX) {
2860     PetscCall(MatHeaderReplace(A, &B));
2861   } else {
2862     *newmat = B;
2863   }
2864   PetscFunctionReturn(PETSC_SUCCESS);
2865 }
2866 
2867 /*MC
2868    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2869 
2870    Options Database Keys:
2871 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2872 . -mat_block_size <bs> - set the blocksize used to store the matrix
2873 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2874 - -mat_use_hash_table <fact> - set hash table factor
2875 
2876    Level: beginner
2877 
2878    Note:
2879     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2880     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2881 
2882 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2883 M*/
2884 
2885 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2886 
2887 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2888 {
2889   Mat_MPIBAIJ *b;
2890   PetscBool    flg = PETSC_FALSE;
2891 
2892   PetscFunctionBegin;
2893   PetscCall(PetscNew(&b));
2894   B->data      = (void *)b;
2895   B->ops[0]    = MatOps_Values;
2896   B->assembled = PETSC_FALSE;
2897 
2898   B->insertmode = NOT_SET_VALUES;
2899   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2900   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2901 
2902   /* build local table of row and column ownerships */
2903   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2904 
2905   /* build cache for off array entries formed */
2906   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2907 
2908   b->donotstash  = PETSC_FALSE;
2909   b->colmap      = NULL;
2910   b->garray      = NULL;
2911   b->roworiented = PETSC_TRUE;
2912 
2913   /* stuff used in block assembly */
2914   b->barray = NULL;
2915 
2916   /* stuff used for matrix vector multiply */
2917   b->lvec  = NULL;
2918   b->Mvctx = NULL;
2919 
2920   /* stuff for MatGetRow() */
2921   b->rowindices   = NULL;
2922   b->rowvalues    = NULL;
2923   b->getrowactive = PETSC_FALSE;
2924 
2925   /* hash table stuff */
2926   b->ht           = NULL;
2927   b->hd           = NULL;
2928   b->ht_size      = 0;
2929   b->ht_flag      = PETSC_FALSE;
2930   b->ht_fact      = 0;
2931   b->ht_total_ct  = 0;
2932   b->ht_insert_ct = 0;
2933 
2934   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2935   b->ijonly = PETSC_FALSE;
2936 
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2940 #if defined(PETSC_HAVE_HYPRE)
2941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2942 #endif
2943   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2944   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2946   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2947   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2948   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2949   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2950   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2951 
2952   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2953   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2954   if (flg) {
2955     PetscReal fact = 1.39;
2956     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2957     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2958     if (fact <= 1.0) fact = 1.39;
2959     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2960     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2961   }
2962   PetscOptionsEnd();
2963   PetscFunctionReturn(PETSC_SUCCESS);
2964 }
2965 
2966 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2967 /*MC
2968    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2969 
2970    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2971    and `MATMPIBAIJ` otherwise.
2972 
2973    Options Database Keys:
2974 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2975 
2976   Level: beginner
2977 
2978 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2979 M*/
2980 
2981 /*@
2982   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2983   (block compressed row).
2984 
2985   Collective
2986 
2987   Input Parameters:
2988 + B     - the matrix
2989 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2990           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2991 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2992            submatrix  (same for all local rows)
2993 . d_nnz - array containing the number of block nonzeros in the various block rows
2994            of the in diagonal portion of the local (possibly different for each block
2995            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2996            set it even if it is zero.
2997 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2998            submatrix (same for all local rows).
2999 - o_nnz - array containing the number of nonzeros in the various block rows of the
3000            off-diagonal portion of the local submatrix (possibly different for
3001            each block row) or `NULL`.
3002 
3003    If the *_nnz parameter is given then the *_nz parameter is ignored
3004 
3005   Options Database Keys:
3006 + -mat_block_size            - size of the blocks to use
3007 - -mat_use_hash_table <fact> - set hash table factor
3008 
3009   Level: intermediate
3010 
3011   Notes:
3012   For good matrix assembly performance
3013   the user should preallocate the matrix storage by setting the parameters
3014   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3015   performance can be increased by more than a factor of 50.
3016 
3017   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3018   than it must be used on all processors that share the object for that argument.
3019 
3020   Storage Information:
3021   For a square global matrix we define each processor's diagonal portion
3022   to be its local rows and the corresponding columns (a square submatrix);
3023   each processor's off-diagonal portion encompasses the remainder of the
3024   local matrix (a rectangular submatrix).
3025 
3026   The user can specify preallocated storage for the diagonal part of
3027   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3028   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3029   memory allocation.  Likewise, specify preallocated storage for the
3030   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3031 
3032   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3033   the figure below we depict these three local rows and all columns (0-11).
3034 
3035 .vb
3036            0 1 2 3 4 5 6 7 8 9 10 11
3037           --------------------------
3038    row 3  |o o o d d d o o o o  o  o
3039    row 4  |o o o d d d o o o o  o  o
3040    row 5  |o o o d d d o o o o  o  o
3041           --------------------------
3042 .ve
3043 
3044   Thus, any entries in the d locations are stored in the d (diagonal)
3045   submatrix, and any entries in the o locations are stored in the
3046   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3047   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3048 
3049   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3050   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3051   In general, for PDE problems in which most nonzeros are near the diagonal,
3052   one expects `d_nz` >> `o_nz`.
3053 
3054   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3055   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3056   You can also run with the option `-info` and look for messages with the string
3057   malloc in them to see if additional memory allocation was needed.
3058 
3059 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3060 @*/
3061 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3062 {
3063   PetscFunctionBegin;
3064   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3065   PetscValidType(B, 1);
3066   PetscValidLogicalCollectiveInt(B, bs, 2);
3067   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3068   PetscFunctionReturn(PETSC_SUCCESS);
3069 }
3070 
3071 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3072 /*@
3073   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3074   (block compressed row).
3075 
3076   Collective
3077 
3078   Input Parameters:
3079 + comm  - MPI communicator
3080 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3081           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3082 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3083           This value should be the same as the local size used in creating the
3084           y vector for the matrix-vector product y = Ax.
3085 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3086           This value should be the same as the local size used in creating the
3087           x vector for the matrix-vector product y = Ax.
3088 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3089 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3090 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3091           submatrix  (same for all local rows)
3092 . d_nnz - array containing the number of nonzero blocks in the various block rows
3093           of the in diagonal portion of the local (possibly different for each block
3094           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3095           and set it even if it is zero.
3096 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3097           submatrix (same for all local rows).
3098 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3099           off-diagonal portion of the local submatrix (possibly different for
3100           each block row) or NULL.
3101 
3102   Output Parameter:
3103 . A - the matrix
3104 
3105   Options Database Keys:
3106 + -mat_block_size            - size of the blocks to use
3107 - -mat_use_hash_table <fact> - set hash table factor
3108 
3109   Level: intermediate
3110 
3111   Notes:
3112   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3113   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3114   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3115 
3116   For good matrix assembly performance
3117   the user should preallocate the matrix storage by setting the parameters
3118   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3119   performance can be increased by more than a factor of 50.
3120 
3121   If the *_nnz parameter is given then the *_nz parameter is ignored
3122 
3123   A nonzero block is any block that as 1 or more nonzeros in it
3124 
3125   The user MUST specify either the local or global matrix dimensions
3126   (possibly both).
3127 
3128   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3129   than it must be used on all processors that share the object for that argument.
3130 
3131   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3132   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3133 
3134   Storage Information:
3135   For a square global matrix we define each processor's diagonal portion
3136   to be its local rows and the corresponding columns (a square submatrix);
3137   each processor's off-diagonal portion encompasses the remainder of the
3138   local matrix (a rectangular submatrix).
3139 
3140   The user can specify preallocated storage for the diagonal part of
3141   the local submatrix with either d_nz or d_nnz (not both).  Set
3142   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3143   memory allocation.  Likewise, specify preallocated storage for the
3144   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3145 
3146   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3147   the figure below we depict these three local rows and all columns (0-11).
3148 
3149 .vb
3150            0 1 2 3 4 5 6 7 8 9 10 11
3151           --------------------------
3152    row 3  |o o o d d d o o o o  o  o
3153    row 4  |o o o d d d o o o o  o  o
3154    row 5  |o o o d d d o o o o  o  o
3155           --------------------------
3156 .ve
3157 
3158   Thus, any entries in the d locations are stored in the d (diagonal)
3159   submatrix, and any entries in the o locations are stored in the
3160   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3161   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3162 
3163   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3164   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3165   In general, for PDE problems in which most nonzeros are near the diagonal,
3166   one expects `d_nz` >> `o_nz`.
3167 
3168 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3169           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3170 @*/
3171 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3172 {
3173   PetscMPIInt size;
3174 
3175   PetscFunctionBegin;
3176   PetscCall(MatCreate(comm, A));
3177   PetscCall(MatSetSizes(*A, m, n, M, N));
3178   PetscCallMPI(MPI_Comm_size(comm, &size));
3179   if (size > 1) {
3180     PetscCall(MatSetType(*A, MATMPIBAIJ));
3181     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3182   } else {
3183     PetscCall(MatSetType(*A, MATSEQBAIJ));
3184     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3185   }
3186   PetscFunctionReturn(PETSC_SUCCESS);
3187 }
3188 
3189 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3190 {
3191   Mat          mat;
3192   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3193   PetscInt     len = 0;
3194 
3195   PetscFunctionBegin;
3196   *newmat = NULL;
3197   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3198   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3199   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3200 
3201   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3202   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3203   if (matin->hash_active) {
3204     PetscCall(MatSetUp(mat));
3205   } else {
3206     mat->factortype   = matin->factortype;
3207     mat->preallocated = PETSC_TRUE;
3208     mat->assembled    = PETSC_TRUE;
3209     mat->insertmode   = NOT_SET_VALUES;
3210 
3211     a             = (Mat_MPIBAIJ *)mat->data;
3212     mat->rmap->bs = matin->rmap->bs;
3213     a->bs2        = oldmat->bs2;
3214     a->mbs        = oldmat->mbs;
3215     a->nbs        = oldmat->nbs;
3216     a->Mbs        = oldmat->Mbs;
3217     a->Nbs        = oldmat->Nbs;
3218 
3219     a->size         = oldmat->size;
3220     a->rank         = oldmat->rank;
3221     a->donotstash   = oldmat->donotstash;
3222     a->roworiented  = oldmat->roworiented;
3223     a->rowindices   = NULL;
3224     a->rowvalues    = NULL;
3225     a->getrowactive = PETSC_FALSE;
3226     a->barray       = NULL;
3227     a->rstartbs     = oldmat->rstartbs;
3228     a->rendbs       = oldmat->rendbs;
3229     a->cstartbs     = oldmat->cstartbs;
3230     a->cendbs       = oldmat->cendbs;
3231 
3232     /* hash table stuff */
3233     a->ht           = NULL;
3234     a->hd           = NULL;
3235     a->ht_size      = 0;
3236     a->ht_flag      = oldmat->ht_flag;
3237     a->ht_fact      = oldmat->ht_fact;
3238     a->ht_total_ct  = 0;
3239     a->ht_insert_ct = 0;
3240 
3241     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3242     if (oldmat->colmap) {
3243 #if defined(PETSC_USE_CTABLE)
3244       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3245 #else
3246       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3247       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3248 #endif
3249     } else a->colmap = NULL;
3250 
3251     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3252       PetscCall(PetscMalloc1(len, &a->garray));
3253       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3254     } else a->garray = NULL;
3255 
3256     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3257     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3258     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3259 
3260     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3261     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3262   }
3263   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3264   *newmat = mat;
3265   PetscFunctionReturn(PETSC_SUCCESS);
3266 }
3267 
3268 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3269 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3270 {
3271   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3272   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3273   PetscScalar *matvals;
3274 
3275   PetscFunctionBegin;
3276   PetscCall(PetscViewerSetUp(viewer));
3277 
3278   /* read in matrix header */
3279   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3280   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3281   M  = header[1];
3282   N  = header[2];
3283   nz = header[3];
3284   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3285   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3286   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3287 
3288   /* set block sizes from the viewer's .info file */
3289   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3290   /* set local sizes if not set already */
3291   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3292   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3293   /* set global sizes if not set already */
3294   if (mat->rmap->N < 0) mat->rmap->N = M;
3295   if (mat->cmap->N < 0) mat->cmap->N = N;
3296   PetscCall(PetscLayoutSetUp(mat->rmap));
3297   PetscCall(PetscLayoutSetUp(mat->cmap));
3298 
3299   /* check if the matrix sizes are correct */
3300   PetscCall(MatGetSize(mat, &rows, &cols));
3301   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3302   PetscCall(MatGetBlockSize(mat, &bs));
3303   PetscCall(MatGetLocalSize(mat, &m, &n));
3304   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3305   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3306   mbs = m / bs;
3307   nbs = n / bs;
3308 
3309   /* read in row lengths and build row indices */
3310   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3311   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3312   rowidxs[0] = 0;
3313   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3314   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3315   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3316 
3317   /* read in column indices and matrix values */
3318   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3319   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3320   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3321 
3322   {                /* preallocate matrix storage */
3323     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3324     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3325     PetscBool  sbaij, done;
3326     PetscInt  *d_nnz, *o_nnz;
3327 
3328     PetscCall(PetscBTCreate(nbs, &bt));
3329     PetscCall(PetscHSetICreate(&ht));
3330     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3331     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3332     for (i = 0; i < mbs; i++) {
3333       PetscCall(PetscBTMemzero(nbs, bt));
3334       PetscCall(PetscHSetIClear(ht));
3335       for (k = 0; k < bs; k++) {
3336         PetscInt row = bs * i + k;
3337         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3338           PetscInt col = colidxs[j];
3339           if (!sbaij || col >= row) {
3340             if (col >= cs && col < ce) {
3341               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3342             } else {
3343               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3344               if (done) o_nnz[i]++;
3345             }
3346           }
3347         }
3348       }
3349     }
3350     PetscCall(PetscBTDestroy(&bt));
3351     PetscCall(PetscHSetIDestroy(&ht));
3352     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3353     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3354     PetscCall(PetscFree2(d_nnz, o_nnz));
3355   }
3356 
3357   /* store matrix values */
3358   for (i = 0; i < m; i++) {
3359     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3360     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3361   }
3362 
3363   PetscCall(PetscFree(rowidxs));
3364   PetscCall(PetscFree2(colidxs, matvals));
3365   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3366   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3367   PetscFunctionReturn(PETSC_SUCCESS);
3368 }
3369 
3370 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3371 {
3372   PetscBool isbinary;
3373 
3374   PetscFunctionBegin;
3375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3376   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3377   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3378   PetscFunctionReturn(PETSC_SUCCESS);
3379 }
3380 
3381 /*@
3382   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3383 
3384   Input Parameters:
3385 + mat  - the matrix
3386 - fact - factor
3387 
3388   Options Database Key:
3389 . -mat_use_hash_table <fact> - provide the factor
3390 
3391   Level: advanced
3392 
3393 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3394 @*/
3395 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3396 {
3397   PetscFunctionBegin;
3398   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3399   PetscFunctionReturn(PETSC_SUCCESS);
3400 }
3401 
3402 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3403 {
3404   Mat_MPIBAIJ *baij;
3405 
3406   PetscFunctionBegin;
3407   baij          = (Mat_MPIBAIJ *)mat->data;
3408   baij->ht_fact = fact;
3409   PetscFunctionReturn(PETSC_SUCCESS);
3410 }
3411 
3412 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3413 {
3414   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3415   PetscBool    flg;
3416 
3417   PetscFunctionBegin;
3418   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3419   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3420   if (Ad) *Ad = a->A;
3421   if (Ao) *Ao = a->B;
3422   if (colmap) *colmap = a->garray;
3423   PetscFunctionReturn(PETSC_SUCCESS);
3424 }
3425 
3426 /*
3427     Special version for direct calls from Fortran (to eliminate two function call overheads
3428 */
3429 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3430   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3431 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3432   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3433 #endif
3434 
3435 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3436 /*@C
3437   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3438 
3439   Collective
3440 
3441   Input Parameters:
3442 + matin  - the matrix
3443 . min    - number of input rows
3444 . im     - input rows
3445 . nin    - number of input columns
3446 . in     - input columns
3447 . v      - numerical values input
3448 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3449 
3450   Level: advanced
3451 
3452   Developer Notes:
3453   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3454 
3455 .seealso: `Mat`, `MatSetValuesBlocked()`
3456 @*/
3457 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3458 {
3459   /* convert input arguments to C version */
3460   Mat        mat = *matin;
3461   PetscInt   m = *min, n = *nin;
3462   InsertMode addv = *addvin;
3463 
3464   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3465   const MatScalar *value;
3466   MatScalar       *barray      = baij->barray;
3467   PetscBool        roworiented = baij->roworiented;
3468   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3469   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3470   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3471 
3472   PetscFunctionBegin;
3473   /* tasks normally handled by MatSetValuesBlocked() */
3474   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3475   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3476   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3477   if (mat->assembled) {
3478     mat->was_assembled = PETSC_TRUE;
3479     mat->assembled     = PETSC_FALSE;
3480   }
3481   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3482 
3483   if (!barray) {
3484     PetscCall(PetscMalloc1(bs2, &barray));
3485     baij->barray = barray;
3486   }
3487 
3488   if (roworiented) stepval = (n - 1) * bs;
3489   else stepval = (m - 1) * bs;
3490 
3491   for (i = 0; i < m; i++) {
3492     if (im[i] < 0) continue;
3493     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3494     if (im[i] >= rstart && im[i] < rend) {
3495       row = im[i] - rstart;
3496       for (j = 0; j < n; j++) {
3497         /* If NumCol = 1 then a copy is not required */
3498         if ((roworiented) && (n == 1)) {
3499           barray = (MatScalar *)v + i * bs2;
3500         } else if ((!roworiented) && (m == 1)) {
3501           barray = (MatScalar *)v + j * bs2;
3502         } else { /* Here a copy is required */
3503           if (roworiented) {
3504             value = v + i * (stepval + bs) * bs + j * bs;
3505           } else {
3506             value = v + j * (stepval + bs) * bs + i * bs;
3507           }
3508           for (ii = 0; ii < bs; ii++, value += stepval) {
3509             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3510           }
3511           barray -= bs2;
3512         }
3513 
3514         if (in[j] >= cstart && in[j] < cend) {
3515           col = in[j] - cstart;
3516           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3517         } else if (in[j] < 0) {
3518           continue;
3519         } else {
3520           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3521           if (mat->was_assembled) {
3522             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3523 
3524 #if defined(PETSC_USE_DEBUG)
3525   #if defined(PETSC_USE_CTABLE)
3526             {
3527               PetscInt data;
3528               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3529               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3530             }
3531   #else
3532             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3533   #endif
3534 #endif
3535 #if defined(PETSC_USE_CTABLE)
3536             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3537             col = (col - 1) / bs;
3538 #else
3539             col = (baij->colmap[in[j]] - 1) / bs;
3540 #endif
3541             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3542               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3543               col = in[j];
3544             }
3545           } else col = in[j];
3546           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3547         }
3548       }
3549     } else {
3550       if (!baij->donotstash) {
3551         if (roworiented) {
3552           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3553         } else {
3554           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3555         }
3556       }
3557     }
3558   }
3559 
3560   /* task normally handled by MatSetValuesBlocked() */
3561   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3562   PetscFunctionReturn(PETSC_SUCCESS);
3563 }
3564 
3565 /*@
3566   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3567 
3568   Collective
3569 
3570   Input Parameters:
3571 + comm - MPI communicator
3572 . bs   - the block size, only a block size of 1 is supported
3573 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3574 . n    - This value should be the same as the local size used in creating the
3575          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3576          calculated if `N` is given) For square matrices `n` is almost always `m`.
3577 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3578 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3579 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3580 . j    - column indices
3581 - a    - matrix values
3582 
3583   Output Parameter:
3584 . mat - the matrix
3585 
3586   Level: intermediate
3587 
3588   Notes:
3589   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3590   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3591   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3592 
3593   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3594   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3595   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3596   with column-major ordering within blocks.
3597 
3598   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3599 
3600 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3601           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3602 @*/
3603 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3604 {
3605   PetscFunctionBegin;
3606   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3607   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3608   PetscCall(MatCreate(comm, mat));
3609   PetscCall(MatSetSizes(*mat, m, n, M, N));
3610   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3611   PetscCall(MatSetBlockSize(*mat, bs));
3612   PetscCall(MatSetUp(*mat));
3613   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3614   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3615   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3616   PetscFunctionReturn(PETSC_SUCCESS);
3617 }
3618 
3619 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3620 {
3621   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3622   PetscInt    *indx;
3623   PetscScalar *values;
3624 
3625   PetscFunctionBegin;
3626   PetscCall(MatGetSize(inmat, &m, &N));
3627   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3628     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3629     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3630     PetscInt    *bindx, rmax = a->rmax, j;
3631     PetscMPIInt  rank, size;
3632 
3633     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3634     mbs = m / bs;
3635     Nbs = N / cbs;
3636     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3637     nbs = n / cbs;
3638 
3639     PetscCall(PetscMalloc1(rmax, &bindx));
3640     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3641 
3642     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3643     PetscCallMPI(MPI_Comm_rank(comm, &size));
3644     if (rank == size - 1) {
3645       /* Check sum(nbs) = Nbs */
3646       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3647     }
3648 
3649     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3650     for (i = 0; i < mbs; i++) {
3651       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3652       nnz = nnz / bs;
3653       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3654       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3655       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3656     }
3657     PetscCall(PetscFree(bindx));
3658 
3659     PetscCall(MatCreate(comm, outmat));
3660     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3661     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3662     PetscCall(MatSetType(*outmat, MATBAIJ));
3663     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3664     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3665     MatPreallocateEnd(dnz, onz);
3666     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3667   }
3668 
3669   /* numeric phase */
3670   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3671   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3672 
3673   for (i = 0; i < m; i++) {
3674     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3675     Ii = i + rstart;
3676     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3677     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3678   }
3679   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3680   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3681   PetscFunctionReturn(PETSC_SUCCESS);
3682 }
3683