xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 42ce410b93811b6a1b27796b46869d3e6c55267a)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)(B)->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
663       row = idxm[i] - bsrstart;
664       for (j = 0; j < n; j++) {
665         if (idxn[j] < 0) continue; /* negative column */
666         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667         if (idxn[j] >= bscstart && idxn[j] < bscend) {
668           col = idxn[j] - bscstart;
669           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670         } else {
671           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674           data--;
675 #else
676           data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
679           else {
680             col = data + idxn[j] % bs;
681             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682           }
683         }
684       }
685     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
691 {
692   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
693   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
694   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
695   PetscReal    sum = 0.0;
696   MatScalar   *v;
697 
698   PetscFunctionBegin;
699   if (baij->size == 1) {
700     PetscCall(MatNorm(baij->A, type, nrm));
701   } else {
702     if (type == NORM_FROBENIUS) {
703       v  = amat->a;
704       nz = amat->nz * bs2;
705       for (i = 0; i < nz; i++) {
706         sum += PetscRealPart(PetscConj(*v) * (*v));
707         v++;
708       }
709       v  = bmat->a;
710       nz = bmat->nz * bs2;
711       for (i = 0; i < nz; i++) {
712         sum += PetscRealPart(PetscConj(*v) * (*v));
713         v++;
714       }
715       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
716       *nrm = PetscSqrtReal(*nrm);
717     } else if (type == NORM_1) { /* max column sum */
718       PetscReal *tmp, *tmp2;
719       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
720       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
721       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
722       v  = amat->a;
723       jj = amat->j;
724       for (i = 0; i < amat->nz; i++) {
725         for (j = 0; j < bs; j++) {
726           col = bs * (cstart + *jj) + j; /* column index */
727           for (row = 0; row < bs; row++) {
728             tmp[col] += PetscAbsScalar(*v);
729             v++;
730           }
731         }
732         jj++;
733       }
734       v  = bmat->a;
735       jj = bmat->j;
736       for (i = 0; i < bmat->nz; i++) {
737         for (j = 0; j < bs; j++) {
738           col = bs * garray[*jj] + j;
739           for (row = 0; row < bs; row++) {
740             tmp[col] += PetscAbsScalar(*v);
741             v++;
742           }
743         }
744         jj++;
745       }
746       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
747       *nrm = 0.0;
748       for (j = 0; j < mat->cmap->N; j++) {
749         if (tmp2[j] > *nrm) *nrm = tmp2[j];
750       }
751       PetscCall(PetscFree(tmp));
752       PetscCall(PetscFree(tmp2));
753     } else if (type == NORM_INFINITY) { /* max row sum */
754       PetscReal *sums;
755       PetscCall(PetscMalloc1(bs, &sums));
756       sum = 0.0;
757       for (j = 0; j < amat->mbs; j++) {
758         for (row = 0; row < bs; row++) sums[row] = 0.0;
759         v  = amat->a + bs2 * amat->i[j];
760         nz = amat->i[j + 1] - amat->i[j];
761         for (i = 0; i < nz; i++) {
762           for (col = 0; col < bs; col++) {
763             for (row = 0; row < bs; row++) {
764               sums[row] += PetscAbsScalar(*v);
765               v++;
766             }
767           }
768         }
769         v  = bmat->a + bs2 * bmat->i[j];
770         nz = bmat->i[j + 1] - bmat->i[j];
771         for (i = 0; i < nz; i++) {
772           for (col = 0; col < bs; col++) {
773             for (row = 0; row < bs; row++) {
774               sums[row] += PetscAbsScalar(*v);
775               v++;
776             }
777           }
778         }
779         for (row = 0; row < bs; row++) {
780           if (sums[row] > sum) sum = sums[row];
781         }
782       }
783       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
784       PetscCall(PetscFree(sums));
785     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
786   }
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 /*
791   Creates the hash table, and sets the table
792   This table is created only once.
793   If new entried need to be added to the matrix
794   then the hash table has to be destroyed and
795   recreated.
796 */
797 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
798 {
799   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
800   Mat          A = baij->A, B = baij->B;
801   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
802   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
803   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
804   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
805   PetscInt    *HT, key;
806   MatScalar  **HD;
807   PetscReal    tmp;
808 #if defined(PETSC_USE_INFO)
809   PetscInt ct = 0, max = 0;
810 #endif
811 
812   PetscFunctionBegin;
813   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
814 
815   baij->ht_size = (PetscInt)(factor * nz);
816   ht_size       = baij->ht_size;
817 
818   /* Allocate Memory for Hash Table */
819   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
820   HD = baij->hd;
821   HT = baij->ht;
822 
823   /* Loop Over A */
824   for (i = 0; i < a->mbs; i++) {
825     for (j = ai[i]; j < ai[i + 1]; j++) {
826       row = i + rstart;
827       col = aj[j] + cstart;
828 
829       key = row * Nbs + col + 1;
830       h1  = HASH(ht_size, key, tmp);
831       for (k = 0; k < ht_size; k++) {
832         if (!HT[(h1 + k) % ht_size]) {
833           HT[(h1 + k) % ht_size] = key;
834           HD[(h1 + k) % ht_size] = a->a + j * bs2;
835           break;
836 #if defined(PETSC_USE_INFO)
837         } else {
838           ct++;
839 #endif
840         }
841       }
842 #if defined(PETSC_USE_INFO)
843       if (k > max) max = k;
844 #endif
845     }
846   }
847   /* Loop Over B */
848   for (i = 0; i < b->mbs; i++) {
849     for (j = bi[i]; j < bi[i + 1]; j++) {
850       row = i + rstart;
851       col = garray[bj[j]];
852       key = row * Nbs + col + 1;
853       h1  = HASH(ht_size, key, tmp);
854       for (k = 0; k < ht_size; k++) {
855         if (!HT[(h1 + k) % ht_size]) {
856           HT[(h1 + k) % ht_size] = key;
857           HD[(h1 + k) % ht_size] = b->a + j * bs2;
858           break;
859 #if defined(PETSC_USE_INFO)
860         } else {
861           ct++;
862 #endif
863         }
864       }
865 #if defined(PETSC_USE_INFO)
866       if (k > max) max = k;
867 #endif
868     }
869   }
870 
871   /* Print Summary */
872 #if defined(PETSC_USE_INFO)
873   for (i = 0, j = 0; i < ht_size; i++) {
874     if (HT[i]) j++;
875   }
876   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
877 #endif
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
882 {
883   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
884   PetscInt     nstash, reallocs;
885 
886   PetscFunctionBegin;
887   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
888 
889   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
890   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
891   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
892   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
893   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
899 {
900   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
901   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
902   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
903   PetscInt    *row, *col;
904   PetscBool    r1, r2, r3, other_disassembled;
905   MatScalar   *val;
906   PetscMPIInt  n;
907 
908   PetscFunctionBegin;
909   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
910   if (!baij->donotstash && !mat->nooffprocentries) {
911     while (1) {
912       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
913       if (!flg) break;
914 
915       for (i = 0; i < n;) {
916         /* Now identify the consecutive vals belonging to the same row */
917         for (j = i, rstart = row[j]; j < n; j++) {
918           if (row[j] != rstart) break;
919         }
920         if (j < n) ncols = j - i;
921         else ncols = n - i;
922         /* Now assemble all these values with a single function call */
923         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
924         i = j;
925       }
926     }
927     PetscCall(MatStashScatterEnd_Private(&mat->stash));
928     /* Now process the block-stash. Since the values are stashed column-oriented,
929        set the row-oriented flag to column-oriented, and after MatSetValues()
930        restore the original flags */
931     r1 = baij->roworiented;
932     r2 = a->roworiented;
933     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
934 
935     baij->roworiented                           = PETSC_FALSE;
936     a->roworiented                              = PETSC_FALSE;
937     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
938     while (1) {
939       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
940       if (!flg) break;
941 
942       for (i = 0; i < n;) {
943         /* Now identify the consecutive vals belonging to the same row */
944         for (j = i, rstart = row[j]; j < n; j++) {
945           if (row[j] != rstart) break;
946         }
947         if (j < n) ncols = j - i;
948         else ncols = n - i;
949         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
950         i = j;
951       }
952     }
953     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
954 
955     baij->roworiented                           = r1;
956     a->roworiented                              = r2;
957     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
958   }
959 
960   PetscCall(MatAssemblyBegin(baij->A, mode));
961   PetscCall(MatAssemblyEnd(baij->A, mode));
962 
963   /* determine if any processor has disassembled, if so we must
964      also disassemble ourselves, in order that we may reassemble. */
965   /*
966      if nonzero structure of submatrix B cannot change then we know that
967      no processor disassembled thus we can skip this stuff
968   */
969   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
970     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
971     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
972   }
973 
974   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
975   PetscCall(MatAssemblyBegin(baij->B, mode));
976   PetscCall(MatAssemblyEnd(baij->B, mode));
977 
978 #if defined(PETSC_USE_INFO)
979   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
980     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
981 
982     baij->ht_total_ct  = 0;
983     baij->ht_insert_ct = 0;
984   }
985 #endif
986   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
987     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
988 
989     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
990     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
991   }
992 
993   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
994 
995   baij->rowvalues = NULL;
996 
997   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
998   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
999     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1000     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1001   }
1002   PetscFunctionReturn(PETSC_SUCCESS);
1003 }
1004 
1005 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1006 #include <petscdraw.h>
1007 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1008 {
1009   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1010   PetscMPIInt       rank = baij->rank;
1011   PetscInt          bs   = mat->rmap->bs;
1012   PetscBool         iascii, isdraw;
1013   PetscViewer       sviewer;
1014   PetscViewerFormat format;
1015 
1016   PetscFunctionBegin;
1017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1019   if (iascii) {
1020     PetscCall(PetscViewerGetFormat(viewer, &format));
1021     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1022       MatInfo info;
1023       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1024       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1025       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1026       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1027                                                    mat->rmap->bs, (double)info.memory));
1028       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1029       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1030       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1031       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1032       PetscCall(PetscViewerFlush(viewer));
1033       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1034       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1035       PetscCall(VecScatterView(baij->Mvctx, viewer));
1036       PetscFunctionReturn(PETSC_SUCCESS);
1037     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1038       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1039       PetscFunctionReturn(PETSC_SUCCESS);
1040     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1041       PetscFunctionReturn(PETSC_SUCCESS);
1042     }
1043   }
1044 
1045   if (isdraw) {
1046     PetscDraw draw;
1047     PetscBool isnull;
1048     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1049     PetscCall(PetscDrawIsNull(draw, &isnull));
1050     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1051   }
1052 
1053   {
1054     /* assemble the entire matrix onto first processor. */
1055     Mat          A;
1056     Mat_SeqBAIJ *Aloc;
1057     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1058     MatScalar   *a;
1059     const char  *matname;
1060 
1061     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1062     /* Perhaps this should be the type of mat? */
1063     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1064     if (rank == 0) {
1065       PetscCall(MatSetSizes(A, M, N, M, N));
1066     } else {
1067       PetscCall(MatSetSizes(A, 0, 0, M, N));
1068     }
1069     PetscCall(MatSetType(A, MATMPIBAIJ));
1070     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1071     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1072 
1073     /* copy over the A part */
1074     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1075     ai   = Aloc->i;
1076     aj   = Aloc->j;
1077     a    = Aloc->a;
1078     PetscCall(PetscMalloc1(bs, &rvals));
1079 
1080     for (i = 0; i < mbs; i++) {
1081       rvals[0] = bs * (baij->rstartbs + i);
1082       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1083       for (j = ai[i]; j < ai[i + 1]; j++) {
1084         col = (baij->cstartbs + aj[j]) * bs;
1085         for (k = 0; k < bs; k++) {
1086           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1087           col++;
1088           a += bs;
1089         }
1090       }
1091     }
1092     /* copy over the B part */
1093     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1094     ai   = Aloc->i;
1095     aj   = Aloc->j;
1096     a    = Aloc->a;
1097     for (i = 0; i < mbs; i++) {
1098       rvals[0] = bs * (baij->rstartbs + i);
1099       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1100       for (j = ai[i]; j < ai[i + 1]; j++) {
1101         col = baij->garray[aj[j]] * bs;
1102         for (k = 0; k < bs; k++) {
1103           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1104           col++;
1105           a += bs;
1106         }
1107       }
1108     }
1109     PetscCall(PetscFree(rvals));
1110     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1111     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1112     /*
1113        Everyone has to call to draw the matrix since the graphics waits are
1114        synchronized across all processors that share the PetscDraw object
1115     */
1116     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1117     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1118     if (rank == 0) {
1119       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1120       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1121     }
1122     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1123     PetscCall(MatDestroy(&A));
1124   }
1125   PetscFunctionReturn(PETSC_SUCCESS);
1126 }
1127 
1128 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1129 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1130 {
1131   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1132   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1133   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1134   const PetscInt *garray = aij->garray;
1135   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1136   PetscInt64      nz, hnz;
1137   PetscInt       *rowlens, *colidxs;
1138   PetscScalar    *matvals;
1139   PetscMPIInt     rank;
1140 
1141   PetscFunctionBegin;
1142   PetscCall(PetscViewerSetUp(viewer));
1143 
1144   M  = mat->rmap->N;
1145   N  = mat->cmap->N;
1146   m  = mat->rmap->n;
1147   rs = mat->rmap->rstart;
1148   cs = mat->cmap->rstart;
1149   bs = mat->rmap->bs;
1150   nz = bs * bs * (A->nz + B->nz);
1151 
1152   /* write matrix header */
1153   header[0] = MAT_FILE_CLASSID;
1154   header[1] = M;
1155   header[2] = N;
1156   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1157   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1158   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1159   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1160 
1161   /* fill in and store row lengths */
1162   PetscCall(PetscMalloc1(m, &rowlens));
1163   for (cnt = 0, i = 0; i < A->mbs; i++)
1164     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1165   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1166   PetscCall(PetscFree(rowlens));
1167 
1168   /* fill in and store column indices */
1169   PetscCall(PetscMalloc1(nz, &colidxs));
1170   for (cnt = 0, i = 0; i < A->mbs; i++) {
1171     for (k = 0; k < bs; k++) {
1172       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1173         if (garray[B->j[jb]] > cs / bs) break;
1174         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1175       }
1176       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1177         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1178       for (; jb < B->i[i + 1]; jb++)
1179         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1180     }
1181   }
1182   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1183   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1184   PetscCall(PetscFree(colidxs));
1185 
1186   /* fill in and store nonzero values */
1187   PetscCall(PetscMalloc1(nz, &matvals));
1188   for (cnt = 0, i = 0; i < A->mbs; i++) {
1189     for (k = 0; k < bs; k++) {
1190       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1191         if (garray[B->j[jb]] > cs / bs) break;
1192         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1193       }
1194       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1195         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1196       for (; jb < B->i[i + 1]; jb++)
1197         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1198     }
1199   }
1200   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1201   PetscCall(PetscFree(matvals));
1202 
1203   /* write block size option to the viewer's .info file */
1204   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1205   PetscFunctionReturn(PETSC_SUCCESS);
1206 }
1207 
1208 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1209 {
1210   PetscBool iascii, isdraw, issocket, isbinary;
1211 
1212   PetscFunctionBegin;
1213   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1214   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1215   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1216   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1217   if (iascii || isdraw || issocket) {
1218     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1219   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1220   PetscFunctionReturn(PETSC_SUCCESS);
1221 }
1222 
1223 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1224 {
1225   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1226   PetscInt     nt;
1227 
1228   PetscFunctionBegin;
1229   PetscCall(VecGetLocalSize(xx, &nt));
1230   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1231   PetscCall(VecGetLocalSize(yy, &nt));
1232   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1233   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1234   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1235   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1236   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1237   PetscFunctionReturn(PETSC_SUCCESS);
1238 }
1239 
1240 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1241 {
1242   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1243 
1244   PetscFunctionBegin;
1245   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1246   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1247   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1248   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1249   PetscFunctionReturn(PETSC_SUCCESS);
1250 }
1251 
1252 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1253 {
1254   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1255 
1256   PetscFunctionBegin;
1257   /* do nondiagonal part */
1258   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1259   /* do local part */
1260   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1261   /* add partial results together */
1262   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1263   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1264   PetscFunctionReturn(PETSC_SUCCESS);
1265 }
1266 
1267 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1268 {
1269   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1270 
1271   PetscFunctionBegin;
1272   /* do nondiagonal part */
1273   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1274   /* do local part */
1275   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1276   /* add partial results together */
1277   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1278   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1279   PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281 
1282 /*
1283   This only works correctly for square matrices where the subblock A->A is the
1284    diagonal block
1285 */
1286 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1287 {
1288   PetscFunctionBegin;
1289   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1290   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1291   PetscFunctionReturn(PETSC_SUCCESS);
1292 }
1293 
1294 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1295 {
1296   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1297 
1298   PetscFunctionBegin;
1299   PetscCall(MatScale(a->A, aa));
1300   PetscCall(MatScale(a->B, aa));
1301   PetscFunctionReturn(PETSC_SUCCESS);
1302 }
1303 
1304 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1305 {
1306   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1307   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1308   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1309   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1310   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1311 
1312   PetscFunctionBegin;
1313   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1314   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1315   mat->getrowactive = PETSC_TRUE;
1316 
1317   if (!mat->rowvalues && (idx || v)) {
1318     /*
1319         allocate enough space to hold information from the longest row.
1320     */
1321     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1322     PetscInt     max = 1, mbs = mat->mbs, tmp;
1323     for (i = 0; i < mbs; i++) {
1324       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1325       if (max < tmp) max = tmp;
1326     }
1327     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1328   }
1329   lrow = row - brstart;
1330 
1331   pvA = &vworkA;
1332   pcA = &cworkA;
1333   pvB = &vworkB;
1334   pcB = &cworkB;
1335   if (!v) {
1336     pvA = NULL;
1337     pvB = NULL;
1338   }
1339   if (!idx) {
1340     pcA = NULL;
1341     if (!v) pcB = NULL;
1342   }
1343   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1344   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1345   nztot = nzA + nzB;
1346 
1347   cmap = mat->garray;
1348   if (v || idx) {
1349     if (nztot) {
1350       /* Sort by increasing column numbers, assuming A and B already sorted */
1351       PetscInt imark = -1;
1352       if (v) {
1353         *v = v_p = mat->rowvalues;
1354         for (i = 0; i < nzB; i++) {
1355           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1356           else break;
1357         }
1358         imark = i;
1359         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1360         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1361       }
1362       if (idx) {
1363         *idx = idx_p = mat->rowindices;
1364         if (imark > -1) {
1365           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1366         } else {
1367           for (i = 0; i < nzB; i++) {
1368             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1369             else break;
1370           }
1371           imark = i;
1372         }
1373         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1374         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1375       }
1376     } else {
1377       if (idx) *idx = NULL;
1378       if (v) *v = NULL;
1379     }
1380   }
1381   *nz = nztot;
1382   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1383   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1388 {
1389   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1390 
1391   PetscFunctionBegin;
1392   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1393   baij->getrowactive = PETSC_FALSE;
1394   PetscFunctionReturn(PETSC_SUCCESS);
1395 }
1396 
1397 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1398 {
1399   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1400 
1401   PetscFunctionBegin;
1402   PetscCall(MatZeroEntries(l->A));
1403   PetscCall(MatZeroEntries(l->B));
1404   PetscFunctionReturn(PETSC_SUCCESS);
1405 }
1406 
1407 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1408 {
1409   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1410   Mat            A = a->A, B = a->B;
1411   PetscLogDouble isend[5], irecv[5];
1412 
1413   PetscFunctionBegin;
1414   info->block_size = (PetscReal)matin->rmap->bs;
1415 
1416   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1417 
1418   isend[0] = info->nz_used;
1419   isend[1] = info->nz_allocated;
1420   isend[2] = info->nz_unneeded;
1421   isend[3] = info->memory;
1422   isend[4] = info->mallocs;
1423 
1424   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1425 
1426   isend[0] += info->nz_used;
1427   isend[1] += info->nz_allocated;
1428   isend[2] += info->nz_unneeded;
1429   isend[3] += info->memory;
1430   isend[4] += info->mallocs;
1431 
1432   if (flag == MAT_LOCAL) {
1433     info->nz_used      = isend[0];
1434     info->nz_allocated = isend[1];
1435     info->nz_unneeded  = isend[2];
1436     info->memory       = isend[3];
1437     info->mallocs      = isend[4];
1438   } else if (flag == MAT_GLOBAL_MAX) {
1439     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1440 
1441     info->nz_used      = irecv[0];
1442     info->nz_allocated = irecv[1];
1443     info->nz_unneeded  = irecv[2];
1444     info->memory       = irecv[3];
1445     info->mallocs      = irecv[4];
1446   } else if (flag == MAT_GLOBAL_SUM) {
1447     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1448 
1449     info->nz_used      = irecv[0];
1450     info->nz_allocated = irecv[1];
1451     info->nz_unneeded  = irecv[2];
1452     info->memory       = irecv[3];
1453     info->mallocs      = irecv[4];
1454   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1455   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1456   info->fill_ratio_needed = 0;
1457   info->factor_mallocs    = 0;
1458   PetscFunctionReturn(PETSC_SUCCESS);
1459 }
1460 
1461 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1462 {
1463   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1464 
1465   PetscFunctionBegin;
1466   switch (op) {
1467   case MAT_NEW_NONZERO_LOCATIONS:
1468   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1469   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1470   case MAT_KEEP_NONZERO_PATTERN:
1471   case MAT_NEW_NONZERO_LOCATION_ERR:
1472     MatCheckPreallocated(A, 1);
1473     PetscCall(MatSetOption(a->A, op, flg));
1474     PetscCall(MatSetOption(a->B, op, flg));
1475     break;
1476   case MAT_ROW_ORIENTED:
1477     MatCheckPreallocated(A, 1);
1478     a->roworiented = flg;
1479 
1480     PetscCall(MatSetOption(a->A, op, flg));
1481     PetscCall(MatSetOption(a->B, op, flg));
1482     break;
1483   case MAT_FORCE_DIAGONAL_ENTRIES:
1484   case MAT_SORTED_FULL:
1485     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1486     break;
1487   case MAT_IGNORE_OFF_PROC_ENTRIES:
1488     a->donotstash = flg;
1489     break;
1490   case MAT_USE_HASH_TABLE:
1491     a->ht_flag = flg;
1492     a->ht_fact = 1.39;
1493     break;
1494   case MAT_SYMMETRIC:
1495   case MAT_STRUCTURALLY_SYMMETRIC:
1496   case MAT_HERMITIAN:
1497   case MAT_SUBMAT_SINGLEIS:
1498   case MAT_SYMMETRY_ETERNAL:
1499   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1500   case MAT_SPD_ETERNAL:
1501     /* if the diagonal matrix is square it inherits some of the properties above */
1502     break;
1503   default:
1504     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1505   }
1506   PetscFunctionReturn(PETSC_SUCCESS);
1507 }
1508 
1509 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1510 {
1511   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1512   Mat_SeqBAIJ *Aloc;
1513   Mat          B;
1514   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1515   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1516   MatScalar   *a;
1517 
1518   PetscFunctionBegin;
1519   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1520   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1521     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1522     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1523     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1524     /* Do not know preallocation information, but must set block size */
1525     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1526   } else {
1527     B = *matout;
1528   }
1529 
1530   /* copy over the A part */
1531   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1532   ai   = Aloc->i;
1533   aj   = Aloc->j;
1534   a    = Aloc->a;
1535   PetscCall(PetscMalloc1(bs, &rvals));
1536 
1537   for (i = 0; i < mbs; i++) {
1538     rvals[0] = bs * (baij->rstartbs + i);
1539     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1540     for (j = ai[i]; j < ai[i + 1]; j++) {
1541       col = (baij->cstartbs + aj[j]) * bs;
1542       for (k = 0; k < bs; k++) {
1543         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1544 
1545         col++;
1546         a += bs;
1547       }
1548     }
1549   }
1550   /* copy over the B part */
1551   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1552   ai   = Aloc->i;
1553   aj   = Aloc->j;
1554   a    = Aloc->a;
1555   for (i = 0; i < mbs; i++) {
1556     rvals[0] = bs * (baij->rstartbs + i);
1557     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1558     for (j = ai[i]; j < ai[i + 1]; j++) {
1559       col = baij->garray[aj[j]] * bs;
1560       for (k = 0; k < bs; k++) {
1561         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1562         col++;
1563         a += bs;
1564       }
1565     }
1566   }
1567   PetscCall(PetscFree(rvals));
1568   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1569   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1570 
1571   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1572   else PetscCall(MatHeaderMerge(A, &B));
1573   PetscFunctionReturn(PETSC_SUCCESS);
1574 }
1575 
1576 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1577 {
1578   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1579   Mat          a = baij->A, b = baij->B;
1580   PetscInt     s1, s2, s3;
1581 
1582   PetscFunctionBegin;
1583   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1584   if (rr) {
1585     PetscCall(VecGetLocalSize(rr, &s1));
1586     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1587     /* Overlap communication with computation. */
1588     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1589   }
1590   if (ll) {
1591     PetscCall(VecGetLocalSize(ll, &s1));
1592     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1593     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1594   }
1595   /* scale  the diagonal block */
1596   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1597 
1598   if (rr) {
1599     /* Do a scatter end and then right scale the off-diagonal block */
1600     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1601     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1602   }
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1607 {
1608   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1609   PetscInt    *lrows;
1610   PetscInt     r, len;
1611   PetscBool    cong;
1612 
1613   PetscFunctionBegin;
1614   /* get locally owned rows */
1615   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1616   /* fix right-hand side if needed */
1617   if (x && b) {
1618     const PetscScalar *xx;
1619     PetscScalar       *bb;
1620 
1621     PetscCall(VecGetArrayRead(x, &xx));
1622     PetscCall(VecGetArray(b, &bb));
1623     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1624     PetscCall(VecRestoreArrayRead(x, &xx));
1625     PetscCall(VecRestoreArray(b, &bb));
1626   }
1627 
1628   /* actually zap the local rows */
1629   /*
1630         Zero the required rows. If the "diagonal block" of the matrix
1631      is square and the user wishes to set the diagonal we use separate
1632      code so that MatSetValues() is not called for each diagonal allocating
1633      new memory, thus calling lots of mallocs and slowing things down.
1634 
1635   */
1636   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1637   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1638   PetscCall(MatHasCongruentLayouts(A, &cong));
1639   if ((diag != 0.0) && cong) {
1640     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1641   } else if (diag != 0.0) {
1642     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1643     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1644     for (r = 0; r < len; ++r) {
1645       const PetscInt row = lrows[r] + A->rmap->rstart;
1646       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1647     }
1648     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1649     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1650   } else {
1651     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1652   }
1653   PetscCall(PetscFree(lrows));
1654 
1655   /* only change matrix nonzero state if pattern was allowed to be changed */
1656   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1657     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1658     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1659   }
1660   PetscFunctionReturn(PETSC_SUCCESS);
1661 }
1662 
1663 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1664 {
1665   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1666   PetscMPIInt        n = A->rmap->n, p = 0;
1667   PetscInt           i, j, k, r, len = 0, row, col, count;
1668   PetscInt          *lrows, *owners = A->rmap->range;
1669   PetscSFNode       *rrows;
1670   PetscSF            sf;
1671   const PetscScalar *xx;
1672   PetscScalar       *bb, *mask;
1673   Vec                xmask, lmask;
1674   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1675   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1676   PetscScalar       *aa;
1677 
1678   PetscFunctionBegin;
1679   /* Create SF where leaves are input rows and roots are owned rows */
1680   PetscCall(PetscMalloc1(n, &lrows));
1681   for (r = 0; r < n; ++r) lrows[r] = -1;
1682   PetscCall(PetscMalloc1(N, &rrows));
1683   for (r = 0; r < N; ++r) {
1684     const PetscInt idx = rows[r];
1685     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1686     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1687       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1688     }
1689     rrows[r].rank  = p;
1690     rrows[r].index = rows[r] - owners[p];
1691   }
1692   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1693   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1694   /* Collect flags for rows to be zeroed */
1695   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1696   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1697   PetscCall(PetscSFDestroy(&sf));
1698   /* Compress and put in row numbers */
1699   for (r = 0; r < n; ++r)
1700     if (lrows[r] >= 0) lrows[len++] = r;
1701   /* zero diagonal part of matrix */
1702   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1703   /* handle off-diagonal part of matrix */
1704   PetscCall(MatCreateVecs(A, &xmask, NULL));
1705   PetscCall(VecDuplicate(l->lvec, &lmask));
1706   PetscCall(VecGetArray(xmask, &bb));
1707   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1708   PetscCall(VecRestoreArray(xmask, &bb));
1709   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1710   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1711   PetscCall(VecDestroy(&xmask));
1712   if (x) {
1713     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1714     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1715     PetscCall(VecGetArrayRead(l->lvec, &xx));
1716     PetscCall(VecGetArray(b, &bb));
1717   }
1718   PetscCall(VecGetArray(lmask, &mask));
1719   /* remove zeroed rows of off-diagonal matrix */
1720   for (i = 0; i < len; ++i) {
1721     row   = lrows[i];
1722     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1723     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
1724     for (k = 0; k < count; ++k) {
1725       aa[0] = 0.0;
1726       aa += bs;
1727     }
1728   }
1729   /* loop over all elements of off process part of matrix zeroing removed columns*/
1730   for (i = 0; i < l->B->rmap->N; ++i) {
1731     row = i / bs;
1732     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1733       for (k = 0; k < bs; ++k) {
1734         col = bs * baij->j[j] + k;
1735         if (PetscAbsScalar(mask[col])) {
1736           aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
1737           if (x) bb[i] -= aa[0] * xx[col];
1738           aa[0] = 0.0;
1739         }
1740       }
1741     }
1742   }
1743   if (x) {
1744     PetscCall(VecRestoreArray(b, &bb));
1745     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1746   }
1747   PetscCall(VecRestoreArray(lmask, &mask));
1748   PetscCall(VecDestroy(&lmask));
1749   PetscCall(PetscFree(lrows));
1750 
1751   /* only change matrix nonzero state if pattern was allowed to be changed */
1752   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1753     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1754     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1755   }
1756   PetscFunctionReturn(PETSC_SUCCESS);
1757 }
1758 
1759 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1760 {
1761   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1762 
1763   PetscFunctionBegin;
1764   PetscCall(MatSetUnfactored(a->A));
1765   PetscFunctionReturn(PETSC_SUCCESS);
1766 }
1767 
1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1769 
1770 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1771 {
1772   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1773   Mat          a, b, c, d;
1774   PetscBool    flg;
1775 
1776   PetscFunctionBegin;
1777   a = matA->A;
1778   b = matA->B;
1779   c = matB->A;
1780   d = matB->B;
1781 
1782   PetscCall(MatEqual(a, c, &flg));
1783   if (flg) PetscCall(MatEqual(b, d, &flg));
1784   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1785   PetscFunctionReturn(PETSC_SUCCESS);
1786 }
1787 
1788 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1789 {
1790   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1791   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1792 
1793   PetscFunctionBegin;
1794   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1795   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1796     PetscCall(MatCopy_Basic(A, B, str));
1797   } else {
1798     PetscCall(MatCopy(a->A, b->A, str));
1799     PetscCall(MatCopy(a->B, b->B, str));
1800   }
1801   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1802   PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804 
1805 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1806 {
1807   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1808   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1809   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1810 
1811   PetscFunctionBegin;
1812   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1813   PetscFunctionReturn(PETSC_SUCCESS);
1814 }
1815 
1816 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1817 {
1818   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1819   PetscBLASInt bnz, one                         = 1;
1820   Mat_SeqBAIJ *x, *y;
1821   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1822 
1823   PetscFunctionBegin;
1824   if (str == SAME_NONZERO_PATTERN) {
1825     PetscScalar alpha = a;
1826     x                 = (Mat_SeqBAIJ *)xx->A->data;
1827     y                 = (Mat_SeqBAIJ *)yy->A->data;
1828     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1829     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1830     x = (Mat_SeqBAIJ *)xx->B->data;
1831     y = (Mat_SeqBAIJ *)yy->B->data;
1832     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1833     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1834     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1835   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1836     PetscCall(MatAXPY_Basic(Y, a, X, str));
1837   } else {
1838     Mat       B;
1839     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1840     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1841     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1842     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1843     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1844     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1845     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1846     PetscCall(MatSetType(B, MATMPIBAIJ));
1847     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1848     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1849     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1850     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1851     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1852     PetscCall(MatHeaderMerge(Y, &B));
1853     PetscCall(PetscFree(nnz_d));
1854     PetscCall(PetscFree(nnz_o));
1855   }
1856   PetscFunctionReturn(PETSC_SUCCESS);
1857 }
1858 
1859 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1860 {
1861   PetscFunctionBegin;
1862   if (PetscDefined(USE_COMPLEX)) {
1863     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1864 
1865     PetscCall(MatConjugate_SeqBAIJ(a->A));
1866     PetscCall(MatConjugate_SeqBAIJ(a->B));
1867   }
1868   PetscFunctionReturn(PETSC_SUCCESS);
1869 }
1870 
1871 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1872 {
1873   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1874 
1875   PetscFunctionBegin;
1876   PetscCall(MatRealPart(a->A));
1877   PetscCall(MatRealPart(a->B));
1878   PetscFunctionReturn(PETSC_SUCCESS);
1879 }
1880 
1881 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1882 {
1883   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1884 
1885   PetscFunctionBegin;
1886   PetscCall(MatImaginaryPart(a->A));
1887   PetscCall(MatImaginaryPart(a->B));
1888   PetscFunctionReturn(PETSC_SUCCESS);
1889 }
1890 
1891 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1892 {
1893   IS       iscol_local;
1894   PetscInt csize;
1895 
1896   PetscFunctionBegin;
1897   PetscCall(ISGetLocalSize(iscol, &csize));
1898   if (call == MAT_REUSE_MATRIX) {
1899     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1900     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1901   } else {
1902     PetscCall(ISAllGather(iscol, &iscol_local));
1903   }
1904   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1905   if (call == MAT_INITIAL_MATRIX) {
1906     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1907     PetscCall(ISDestroy(&iscol_local));
1908   }
1909   PetscFunctionReturn(PETSC_SUCCESS);
1910 }
1911 
1912 /*
1913   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1914   in local and then by concatenating the local matrices the end result.
1915   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1916   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1917 */
1918 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1919 {
1920   PetscMPIInt  rank, size;
1921   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1922   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1923   Mat          M, Mreuse;
1924   MatScalar   *vwork, *aa;
1925   MPI_Comm     comm;
1926   IS           isrow_new, iscol_new;
1927   Mat_SeqBAIJ *aij;
1928 
1929   PetscFunctionBegin;
1930   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1931   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1932   PetscCallMPI(MPI_Comm_size(comm, &size));
1933   /* The compression and expansion should be avoided. Doesn't point
1934      out errors, might change the indices, hence buggey */
1935   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1936   if (isrow == iscol) {
1937     iscol_new = isrow_new;
1938     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1939   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1940 
1941   if (call == MAT_REUSE_MATRIX) {
1942     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1943     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1944     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1945   } else {
1946     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1947   }
1948   PetscCall(ISDestroy(&isrow_new));
1949   PetscCall(ISDestroy(&iscol_new));
1950   /*
1951       m - number of local rows
1952       n - number of columns (same on all processors)
1953       rstart - first row in new global matrix generated
1954   */
1955   PetscCall(MatGetBlockSize(mat, &bs));
1956   PetscCall(MatGetSize(Mreuse, &m, &n));
1957   m = m / bs;
1958   n = n / bs;
1959 
1960   if (call == MAT_INITIAL_MATRIX) {
1961     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1962     ii  = aij->i;
1963     jj  = aij->j;
1964 
1965     /*
1966         Determine the number of non-zeros in the diagonal and off-diagonal
1967         portions of the matrix in order to do correct preallocation
1968     */
1969 
1970     /* first get start and end of "diagonal" columns */
1971     if (csize == PETSC_DECIDE) {
1972       PetscCall(ISGetSize(isrow, &mglobal));
1973       if (mglobal == n * bs) { /* square matrix */
1974         nlocal = m;
1975       } else {
1976         nlocal = n / size + ((n % size) > rank);
1977       }
1978     } else {
1979       nlocal = csize / bs;
1980     }
1981     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1982     rstart = rend - nlocal;
1983     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1984 
1985     /* next, compute all the lengths */
1986     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1987     for (i = 0; i < m; i++) {
1988       jend = ii[i + 1] - ii[i];
1989       olen = 0;
1990       dlen = 0;
1991       for (j = 0; j < jend; j++) {
1992         if (*jj < rstart || *jj >= rend) olen++;
1993         else dlen++;
1994         jj++;
1995       }
1996       olens[i] = olen;
1997       dlens[i] = dlen;
1998     }
1999     PetscCall(MatCreate(comm, &M));
2000     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2001     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2002     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2003     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2004     PetscCall(PetscFree2(dlens, olens));
2005   } else {
2006     PetscInt ml, nl;
2007 
2008     M = *newmat;
2009     PetscCall(MatGetLocalSize(M, &ml, &nl));
2010     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2011     PetscCall(MatZeroEntries(M));
2012     /*
2013          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2014        rather than the slower MatSetValues().
2015     */
2016     M->was_assembled = PETSC_TRUE;
2017     M->assembled     = PETSC_FALSE;
2018   }
2019   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2020   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2021   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2022   ii  = aij->i;
2023   jj  = aij->j;
2024   aa  = aij->a;
2025   for (i = 0; i < m; i++) {
2026     row   = rstart / bs + i;
2027     nz    = ii[i + 1] - ii[i];
2028     cwork = jj;
2029     jj    = PetscSafePointerPlusOffset(jj, nz);
2030     vwork = aa;
2031     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2032     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2033   }
2034 
2035   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2036   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2037   *newmat = M;
2038 
2039   /* save submatrix used in processor for next request */
2040   if (call == MAT_INITIAL_MATRIX) {
2041     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2042     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2043   }
2044   PetscFunctionReturn(PETSC_SUCCESS);
2045 }
2046 
2047 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2048 {
2049   MPI_Comm        comm, pcomm;
2050   PetscInt        clocal_size, nrows;
2051   const PetscInt *rows;
2052   PetscMPIInt     size;
2053   IS              crowp, lcolp;
2054 
2055   PetscFunctionBegin;
2056   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2057   /* make a collective version of 'rowp' */
2058   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2059   if (pcomm == comm) {
2060     crowp = rowp;
2061   } else {
2062     PetscCall(ISGetSize(rowp, &nrows));
2063     PetscCall(ISGetIndices(rowp, &rows));
2064     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2065     PetscCall(ISRestoreIndices(rowp, &rows));
2066   }
2067   PetscCall(ISSetPermutation(crowp));
2068   /* make a local version of 'colp' */
2069   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2070   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2071   if (size == 1) {
2072     lcolp = colp;
2073   } else {
2074     PetscCall(ISAllGather(colp, &lcolp));
2075   }
2076   PetscCall(ISSetPermutation(lcolp));
2077   /* now we just get the submatrix */
2078   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2079   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2080   /* clean up */
2081   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2082   if (size > 1) PetscCall(ISDestroy(&lcolp));
2083   PetscFunctionReturn(PETSC_SUCCESS);
2084 }
2085 
2086 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2087 {
2088   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2089   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2090 
2091   PetscFunctionBegin;
2092   if (nghosts) *nghosts = B->nbs;
2093   if (ghosts) *ghosts = baij->garray;
2094   PetscFunctionReturn(PETSC_SUCCESS);
2095 }
2096 
2097 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2098 {
2099   Mat          B;
2100   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2101   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2102   Mat_SeqAIJ  *b;
2103   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2104   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2105   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2106 
2107   PetscFunctionBegin;
2108   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2109   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2110 
2111   /*   Tell every processor the number of nonzeros per row  */
2112   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2113   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2114   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2115   displs = recvcounts + size;
2116   for (i = 0; i < size; i++) {
2117     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2118     displs[i]     = A->rmap->range[i] / bs;
2119   }
2120   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2121   /* Create the sequential matrix of the same type as the local block diagonal  */
2122   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2123   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2124   PetscCall(MatSetType(B, MATSEQAIJ));
2125   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2126   b = (Mat_SeqAIJ *)B->data;
2127 
2128   /*     Copy my part of matrix column indices over  */
2129   sendcount  = ad->nz + bd->nz;
2130   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2131   a_jsendbuf = ad->j;
2132   b_jsendbuf = bd->j;
2133   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2134   cnt        = 0;
2135   for (i = 0; i < n; i++) {
2136     /* put in lower diagonal portion */
2137     m = bd->i[i + 1] - bd->i[i];
2138     while (m > 0) {
2139       /* is it above diagonal (in bd (compressed) numbering) */
2140       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2141       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2142       m--;
2143     }
2144 
2145     /* put in diagonal portion */
2146     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2147 
2148     /* put in upper diagonal portion */
2149     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2150   }
2151   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2152 
2153   /*  Gather all column indices to all processors  */
2154   for (i = 0; i < size; i++) {
2155     recvcounts[i] = 0;
2156     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2157   }
2158   displs[0] = 0;
2159   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2160   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2161   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2162   /* set the b->ilen (length of each row) values */
2163   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2164   /* set the b->i indices */
2165   b->i[0] = 0;
2166   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2167   PetscCall(PetscFree(lens));
2168   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2169   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2170   PetscCall(PetscFree(recvcounts));
2171 
2172   PetscCall(MatPropagateSymmetryOptions(A, B));
2173   *newmat = B;
2174   PetscFunctionReturn(PETSC_SUCCESS);
2175 }
2176 
2177 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2178 {
2179   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2180   Vec          bb1 = NULL;
2181 
2182   PetscFunctionBegin;
2183   if (flag == SOR_APPLY_UPPER) {
2184     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2185     PetscFunctionReturn(PETSC_SUCCESS);
2186   }
2187 
2188   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2189 
2190   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2191     if (flag & SOR_ZERO_INITIAL_GUESS) {
2192       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2193       its--;
2194     }
2195 
2196     while (its--) {
2197       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2198       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2199 
2200       /* update rhs: bb1 = bb - B*x */
2201       PetscCall(VecScale(mat->lvec, -1.0));
2202       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2203 
2204       /* local sweep */
2205       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2206     }
2207   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2208     if (flag & SOR_ZERO_INITIAL_GUESS) {
2209       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2210       its--;
2211     }
2212     while (its--) {
2213       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2214       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2215 
2216       /* update rhs: bb1 = bb - B*x */
2217       PetscCall(VecScale(mat->lvec, -1.0));
2218       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2219 
2220       /* local sweep */
2221       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2222     }
2223   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2224     if (flag & SOR_ZERO_INITIAL_GUESS) {
2225       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2226       its--;
2227     }
2228     while (its--) {
2229       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2230       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2231 
2232       /* update rhs: bb1 = bb - B*x */
2233       PetscCall(VecScale(mat->lvec, -1.0));
2234       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2235 
2236       /* local sweep */
2237       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2238     }
2239   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2240 
2241   PetscCall(VecDestroy(&bb1));
2242   PetscFunctionReturn(PETSC_SUCCESS);
2243 }
2244 
2245 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2246 {
2247   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2248   PetscInt     m, N, i, *garray = aij->garray;
2249   PetscInt     ib, jb, bs = A->rmap->bs;
2250   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2251   MatScalar   *a_val = a_aij->a;
2252   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2253   MatScalar   *b_val = b_aij->a;
2254   PetscReal   *work;
2255 
2256   PetscFunctionBegin;
2257   PetscCall(MatGetSize(A, &m, &N));
2258   PetscCall(PetscCalloc1(N, &work));
2259   if (type == NORM_2) {
2260     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2261       for (jb = 0; jb < bs; jb++) {
2262         for (ib = 0; ib < bs; ib++) {
2263           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2264           a_val++;
2265         }
2266       }
2267     }
2268     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2269       for (jb = 0; jb < bs; jb++) {
2270         for (ib = 0; ib < bs; ib++) {
2271           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2272           b_val++;
2273         }
2274       }
2275     }
2276   } else if (type == NORM_1) {
2277     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2278       for (jb = 0; jb < bs; jb++) {
2279         for (ib = 0; ib < bs; ib++) {
2280           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2281           a_val++;
2282         }
2283       }
2284     }
2285     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2286       for (jb = 0; jb < bs; jb++) {
2287         for (ib = 0; ib < bs; ib++) {
2288           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2289           b_val++;
2290         }
2291       }
2292     }
2293   } else if (type == NORM_INFINITY) {
2294     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2295       for (jb = 0; jb < bs; jb++) {
2296         for (ib = 0; ib < bs; ib++) {
2297           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2298           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2299           a_val++;
2300         }
2301       }
2302     }
2303     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2304       for (jb = 0; jb < bs; jb++) {
2305         for (ib = 0; ib < bs; ib++) {
2306           int col   = garray[b_aij->j[i]] * bs + jb;
2307           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2308           b_val++;
2309         }
2310       }
2311     }
2312   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2313     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2314       for (jb = 0; jb < bs; jb++) {
2315         for (ib = 0; ib < bs; ib++) {
2316           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2317           a_val++;
2318         }
2319       }
2320     }
2321     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2322       for (jb = 0; jb < bs; jb++) {
2323         for (ib = 0; ib < bs; ib++) {
2324           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2325           b_val++;
2326         }
2327       }
2328     }
2329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2330     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2331       for (jb = 0; jb < bs; jb++) {
2332         for (ib = 0; ib < bs; ib++) {
2333           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2334           a_val++;
2335         }
2336       }
2337     }
2338     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2339       for (jb = 0; jb < bs; jb++) {
2340         for (ib = 0; ib < bs; ib++) {
2341           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2342           b_val++;
2343         }
2344       }
2345     }
2346   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2347   if (type == NORM_INFINITY) {
2348     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2349   } else {
2350     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2351   }
2352   PetscCall(PetscFree(work));
2353   if (type == NORM_2) {
2354     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2355   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2356     for (i = 0; i < N; i++) reductions[i] /= m;
2357   }
2358   PetscFunctionReturn(PETSC_SUCCESS);
2359 }
2360 
2361 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2362 {
2363   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2364 
2365   PetscFunctionBegin;
2366   PetscCall(MatInvertBlockDiagonal(a->A, values));
2367   A->factorerrortype             = a->A->factorerrortype;
2368   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2369   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2370   PetscFunctionReturn(PETSC_SUCCESS);
2371 }
2372 
2373 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2374 {
2375   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2376   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2377 
2378   PetscFunctionBegin;
2379   if (!Y->preallocated) {
2380     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2381   } else if (!aij->nz) {
2382     PetscInt nonew = aij->nonew;
2383     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2384     aij->nonew = nonew;
2385   }
2386   PetscCall(MatShift_Basic(Y, a));
2387   PetscFunctionReturn(PETSC_SUCCESS);
2388 }
2389 
2390 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2391 {
2392   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2393 
2394   PetscFunctionBegin;
2395   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2396   PetscCall(MatMissingDiagonal(a->A, missing, d));
2397   if (d) {
2398     PetscInt rstart;
2399     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2400     *d += rstart / A->rmap->bs;
2401   }
2402   PetscFunctionReturn(PETSC_SUCCESS);
2403 }
2404 
2405 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2406 {
2407   PetscFunctionBegin;
2408   *a = ((Mat_MPIBAIJ *)A->data)->A;
2409   PetscFunctionReturn(PETSC_SUCCESS);
2410 }
2411 
2412 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2413 {
2414   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2415 
2416   PetscFunctionBegin;
2417   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2418   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2419   PetscFunctionReturn(PETSC_SUCCESS);
2420 }
2421 
2422 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2423                                        MatGetRow_MPIBAIJ,
2424                                        MatRestoreRow_MPIBAIJ,
2425                                        MatMult_MPIBAIJ,
2426                                        /* 4*/ MatMultAdd_MPIBAIJ,
2427                                        MatMultTranspose_MPIBAIJ,
2428                                        MatMultTransposeAdd_MPIBAIJ,
2429                                        NULL,
2430                                        NULL,
2431                                        NULL,
2432                                        /*10*/ NULL,
2433                                        NULL,
2434                                        NULL,
2435                                        MatSOR_MPIBAIJ,
2436                                        MatTranspose_MPIBAIJ,
2437                                        /*15*/ MatGetInfo_MPIBAIJ,
2438                                        MatEqual_MPIBAIJ,
2439                                        MatGetDiagonal_MPIBAIJ,
2440                                        MatDiagonalScale_MPIBAIJ,
2441                                        MatNorm_MPIBAIJ,
2442                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2443                                        MatAssemblyEnd_MPIBAIJ,
2444                                        MatSetOption_MPIBAIJ,
2445                                        MatZeroEntries_MPIBAIJ,
2446                                        /*24*/ MatZeroRows_MPIBAIJ,
2447                                        NULL,
2448                                        NULL,
2449                                        NULL,
2450                                        NULL,
2451                                        /*29*/ MatSetUp_MPI_Hash,
2452                                        NULL,
2453                                        NULL,
2454                                        MatGetDiagonalBlock_MPIBAIJ,
2455                                        NULL,
2456                                        /*34*/ MatDuplicate_MPIBAIJ,
2457                                        NULL,
2458                                        NULL,
2459                                        NULL,
2460                                        NULL,
2461                                        /*39*/ MatAXPY_MPIBAIJ,
2462                                        MatCreateSubMatrices_MPIBAIJ,
2463                                        MatIncreaseOverlap_MPIBAIJ,
2464                                        MatGetValues_MPIBAIJ,
2465                                        MatCopy_MPIBAIJ,
2466                                        /*44*/ NULL,
2467                                        MatScale_MPIBAIJ,
2468                                        MatShift_MPIBAIJ,
2469                                        NULL,
2470                                        MatZeroRowsColumns_MPIBAIJ,
2471                                        /*49*/ NULL,
2472                                        NULL,
2473                                        NULL,
2474                                        NULL,
2475                                        NULL,
2476                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2477                                        NULL,
2478                                        MatSetUnfactored_MPIBAIJ,
2479                                        MatPermute_MPIBAIJ,
2480                                        MatSetValuesBlocked_MPIBAIJ,
2481                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2482                                        MatDestroy_MPIBAIJ,
2483                                        MatView_MPIBAIJ,
2484                                        NULL,
2485                                        NULL,
2486                                        /*64*/ NULL,
2487                                        NULL,
2488                                        NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2492                                        NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        /*74*/ NULL,
2497                                        MatFDColoringApply_BAIJ,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        /*79*/ NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        NULL,
2505                                        MatLoad_MPIBAIJ,
2506                                        /*84*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        /*89*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        /*94*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*99*/ NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        MatConjugate_MPIBAIJ,
2525                                        NULL,
2526                                        /*104*/ NULL,
2527                                        MatRealPart_MPIBAIJ,
2528                                        MatImaginaryPart_MPIBAIJ,
2529                                        NULL,
2530                                        NULL,
2531                                        /*109*/ NULL,
2532                                        NULL,
2533                                        NULL,
2534                                        NULL,
2535                                        MatMissingDiagonal_MPIBAIJ,
2536                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2537                                        NULL,
2538                                        MatGetGhosts_MPIBAIJ,
2539                                        NULL,
2540                                        NULL,
2541                                        /*119*/ NULL,
2542                                        NULL,
2543                                        NULL,
2544                                        NULL,
2545                                        MatGetMultiProcBlock_MPIBAIJ,
2546                                        /*124*/ NULL,
2547                                        MatGetColumnReductions_MPIBAIJ,
2548                                        MatInvertBlockDiagonal_MPIBAIJ,
2549                                        NULL,
2550                                        NULL,
2551                                        /*129*/ NULL,
2552                                        NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        /*134*/ NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        /*139*/ MatSetBlockSizes_Default,
2562                                        NULL,
2563                                        NULL,
2564                                        MatFDColoringSetUp_MPIXAIJ,
2565                                        NULL,
2566                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2567                                        NULL,
2568                                        NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                        /*150*/ NULL,
2573                                        MatEliminateZeros_MPIBAIJ,
2574                                        MatGetRowSumAbs_MPIBAIJ,
2575                                        NULL,
2576                                        NULL,
2577                                        NULL};
2578 
2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2581 
2582 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2583 {
2584   PetscInt        m, rstart, cstart, cend;
2585   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2586   const PetscInt *JJ          = NULL;
2587   PetscScalar    *values      = NULL;
2588   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2589   PetscBool       nooffprocentries;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2593   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2594   PetscCall(PetscLayoutSetUp(B->rmap));
2595   PetscCall(PetscLayoutSetUp(B->cmap));
2596   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2597   m      = B->rmap->n / bs;
2598   rstart = B->rmap->rstart / bs;
2599   cstart = B->cmap->rstart / bs;
2600   cend   = B->cmap->rend / bs;
2601 
2602   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2603   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2604   for (i = 0; i < m; i++) {
2605     nz = ii[i + 1] - ii[i];
2606     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2607     nz_max = PetscMax(nz_max, nz);
2608     dlen   = 0;
2609     olen   = 0;
2610     JJ     = jj + ii[i];
2611     for (j = 0; j < nz; j++) {
2612       if (*JJ < cstart || *JJ >= cend) olen++;
2613       else dlen++;
2614       JJ++;
2615     }
2616     d_nnz[i] = dlen;
2617     o_nnz[i] = olen;
2618   }
2619   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2620   PetscCall(PetscFree2(d_nnz, o_nnz));
2621 
2622   values = (PetscScalar *)V;
2623   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2624   for (i = 0; i < m; i++) {
2625     PetscInt        row   = i + rstart;
2626     PetscInt        ncols = ii[i + 1] - ii[i];
2627     const PetscInt *icols = jj + ii[i];
2628     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2629       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2630       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2631     } else { /* block ordering does not match so we can only insert one block at a time. */
2632       PetscInt j;
2633       for (j = 0; j < ncols; j++) {
2634         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2635         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2636       }
2637     }
2638   }
2639 
2640   if (!V) PetscCall(PetscFree(values));
2641   nooffprocentries    = B->nooffprocentries;
2642   B->nooffprocentries = PETSC_TRUE;
2643   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2644   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2645   B->nooffprocentries = nooffprocentries;
2646 
2647   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2648   PetscFunctionReturn(PETSC_SUCCESS);
2649 }
2650 
2651 /*@C
2652   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2653 
2654   Collective
2655 
2656   Input Parameters:
2657 + B  - the matrix
2658 . bs - the block size
2659 . i  - the indices into `j` for the start of each local row (starts with zero)
2660 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2661 - v  - optional values in the matrix, use `NULL` if not provided
2662 
2663   Level: advanced
2664 
2665   Notes:
2666   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2667   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2668   called this routine.
2669 
2670   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2671   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2672   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2673   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2674   block column and the second index is over columns within a block.
2675 
2676   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2677 
2678 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2679 @*/
2680 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2681 {
2682   PetscFunctionBegin;
2683   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2684   PetscValidType(B, 1);
2685   PetscValidLogicalCollectiveInt(B, bs, 2);
2686   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2691 {
2692   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2693   PetscInt     i;
2694   PetscMPIInt  size;
2695 
2696   PetscFunctionBegin;
2697   if (B->hash_active) {
2698     B->ops[0]      = b->cops;
2699     B->hash_active = PETSC_FALSE;
2700   }
2701   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2702   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2703   PetscCall(PetscLayoutSetUp(B->rmap));
2704   PetscCall(PetscLayoutSetUp(B->cmap));
2705   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2706 
2707   if (d_nnz) {
2708     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2709   }
2710   if (o_nnz) {
2711     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2712   }
2713 
2714   b->bs2 = bs * bs;
2715   b->mbs = B->rmap->n / bs;
2716   b->nbs = B->cmap->n / bs;
2717   b->Mbs = B->rmap->N / bs;
2718   b->Nbs = B->cmap->N / bs;
2719 
2720   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2721   b->rstartbs = B->rmap->rstart / bs;
2722   b->rendbs   = B->rmap->rend / bs;
2723   b->cstartbs = B->cmap->rstart / bs;
2724   b->cendbs   = B->cmap->rend / bs;
2725 
2726 #if defined(PETSC_USE_CTABLE)
2727   PetscCall(PetscHMapIDestroy(&b->colmap));
2728 #else
2729   PetscCall(PetscFree(b->colmap));
2730 #endif
2731   PetscCall(PetscFree(b->garray));
2732   PetscCall(VecDestroy(&b->lvec));
2733   PetscCall(VecScatterDestroy(&b->Mvctx));
2734 
2735   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2736 
2737   MatSeqXAIJGetOptions_Private(b->B);
2738   PetscCall(MatDestroy(&b->B));
2739   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2740   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2741   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2742   MatSeqXAIJRestoreOptions_Private(b->B);
2743 
2744   MatSeqXAIJGetOptions_Private(b->A);
2745   PetscCall(MatDestroy(&b->A));
2746   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2747   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2748   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2749   MatSeqXAIJRestoreOptions_Private(b->A);
2750 
2751   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2752   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2753   B->preallocated  = PETSC_TRUE;
2754   B->was_assembled = PETSC_FALSE;
2755   B->assembled     = PETSC_FALSE;
2756   PetscFunctionReturn(PETSC_SUCCESS);
2757 }
2758 
2759 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2760 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2761 
2762 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2763 {
2764   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2765   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2766   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2767   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2768 
2769   PetscFunctionBegin;
2770   PetscCall(PetscMalloc1(M + 1, &ii));
2771   ii[0] = 0;
2772   for (i = 0; i < M; i++) {
2773     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2774     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2775     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2776     /* remove one from count of matrix has diagonal */
2777     for (j = id[i]; j < id[i + 1]; j++) {
2778       if (jd[j] == i) {
2779         ii[i + 1]--;
2780         break;
2781       }
2782     }
2783   }
2784   PetscCall(PetscMalloc1(ii[M], &jj));
2785   cnt = 0;
2786   for (i = 0; i < M; i++) {
2787     for (j = io[i]; j < io[i + 1]; j++) {
2788       if (garray[jo[j]] > rstart) break;
2789       jj[cnt++] = garray[jo[j]];
2790     }
2791     for (k = id[i]; k < id[i + 1]; k++) {
2792       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2793     }
2794     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2795   }
2796   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2797   PetscFunctionReturn(PETSC_SUCCESS);
2798 }
2799 
2800 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2801 
2802 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2803 
2804 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2805 {
2806   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2807   Mat_MPIAIJ  *b;
2808   Mat          B;
2809 
2810   PetscFunctionBegin;
2811   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2812 
2813   if (reuse == MAT_REUSE_MATRIX) {
2814     B = *newmat;
2815   } else {
2816     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2817     PetscCall(MatSetType(B, MATMPIAIJ));
2818     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2819     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2820     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2821     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2822   }
2823   b = (Mat_MPIAIJ *)B->data;
2824 
2825   if (reuse == MAT_REUSE_MATRIX) {
2826     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2827     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2828   } else {
2829     PetscInt   *garray = a->garray;
2830     Mat_SeqAIJ *bB;
2831     PetscInt    bs, nnz;
2832     PetscCall(MatDestroy(&b->A));
2833     PetscCall(MatDestroy(&b->B));
2834     /* just clear out the data structure */
2835     PetscCall(MatDisAssemble_MPIAIJ(B));
2836     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2837     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2838 
2839     /* Global numbering for b->B columns */
2840     bB  = (Mat_SeqAIJ *)b->B->data;
2841     bs  = A->rmap->bs;
2842     nnz = bB->i[A->rmap->n];
2843     for (PetscInt k = 0; k < nnz; k++) {
2844       PetscInt bj = bB->j[k] / bs;
2845       PetscInt br = bB->j[k] % bs;
2846       bB->j[k]    = garray[bj] * bs + br;
2847     }
2848   }
2849   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2850   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2851 
2852   if (reuse == MAT_INPLACE_MATRIX) {
2853     PetscCall(MatHeaderReplace(A, &B));
2854   } else {
2855     *newmat = B;
2856   }
2857   PetscFunctionReturn(PETSC_SUCCESS);
2858 }
2859 
2860 /*MC
2861    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2862 
2863    Options Database Keys:
2864 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2865 . -mat_block_size <bs> - set the blocksize used to store the matrix
2866 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2867 - -mat_use_hash_table <fact> - set hash table factor
2868 
2869    Level: beginner
2870 
2871    Note:
2872     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2873     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2874 
2875 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2876 M*/
2877 
2878 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2879 
2880 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2881 {
2882   Mat_MPIBAIJ *b;
2883   PetscBool    flg = PETSC_FALSE;
2884 
2885   PetscFunctionBegin;
2886   PetscCall(PetscNew(&b));
2887   B->data      = (void *)b;
2888   B->ops[0]    = MatOps_Values;
2889   B->assembled = PETSC_FALSE;
2890 
2891   B->insertmode = NOT_SET_VALUES;
2892   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2893   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2894 
2895   /* build local table of row and column ownerships */
2896   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2897 
2898   /* build cache for off array entries formed */
2899   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2900 
2901   b->donotstash  = PETSC_FALSE;
2902   b->colmap      = NULL;
2903   b->garray      = NULL;
2904   b->roworiented = PETSC_TRUE;
2905 
2906   /* stuff used in block assembly */
2907   b->barray = NULL;
2908 
2909   /* stuff used for matrix vector multiply */
2910   b->lvec  = NULL;
2911   b->Mvctx = NULL;
2912 
2913   /* stuff for MatGetRow() */
2914   b->rowindices   = NULL;
2915   b->rowvalues    = NULL;
2916   b->getrowactive = PETSC_FALSE;
2917 
2918   /* hash table stuff */
2919   b->ht           = NULL;
2920   b->hd           = NULL;
2921   b->ht_size      = 0;
2922   b->ht_flag      = PETSC_FALSE;
2923   b->ht_fact      = 0;
2924   b->ht_total_ct  = 0;
2925   b->ht_insert_ct = 0;
2926 
2927   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2928   b->ijonly = PETSC_FALSE;
2929 
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2933 #if defined(PETSC_HAVE_HYPRE)
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2935 #endif
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2943   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2944 
2945   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2946   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2947   if (flg) {
2948     PetscReal fact = 1.39;
2949     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2950     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2951     if (fact <= 1.0) fact = 1.39;
2952     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2953     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2954   }
2955   PetscOptionsEnd();
2956   PetscFunctionReturn(PETSC_SUCCESS);
2957 }
2958 
2959 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2960 /*MC
2961    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2962 
2963    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2964    and `MATMPIBAIJ` otherwise.
2965 
2966    Options Database Keys:
2967 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2968 
2969   Level: beginner
2970 
2971 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2972 M*/
2973 
2974 /*@
2975   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2976   (block compressed row).
2977 
2978   Collective
2979 
2980   Input Parameters:
2981 + B     - the matrix
2982 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2983           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2984 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2985            submatrix  (same for all local rows)
2986 . d_nnz - array containing the number of block nonzeros in the various block rows
2987            of the in diagonal portion of the local (possibly different for each block
2988            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2989            set it even if it is zero.
2990 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2991            submatrix (same for all local rows).
2992 - o_nnz - array containing the number of nonzeros in the various block rows of the
2993            off-diagonal portion of the local submatrix (possibly different for
2994            each block row) or `NULL`.
2995 
2996    If the *_nnz parameter is given then the *_nz parameter is ignored
2997 
2998   Options Database Keys:
2999 + -mat_block_size            - size of the blocks to use
3000 - -mat_use_hash_table <fact> - set hash table factor
3001 
3002   Level: intermediate
3003 
3004   Notes:
3005   For good matrix assembly performance
3006   the user should preallocate the matrix storage by setting the parameters
3007   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3008   performance can be increased by more than a factor of 50.
3009 
3010   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3011   than it must be used on all processors that share the object for that argument.
3012 
3013   Storage Information:
3014   For a square global matrix we define each processor's diagonal portion
3015   to be its local rows and the corresponding columns (a square submatrix);
3016   each processor's off-diagonal portion encompasses the remainder of the
3017   local matrix (a rectangular submatrix).
3018 
3019   The user can specify preallocated storage for the diagonal part of
3020   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3021   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3022   memory allocation.  Likewise, specify preallocated storage for the
3023   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3024 
3025   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3026   the figure below we depict these three local rows and all columns (0-11).
3027 
3028 .vb
3029            0 1 2 3 4 5 6 7 8 9 10 11
3030           --------------------------
3031    row 3  |o o o d d d o o o o  o  o
3032    row 4  |o o o d d d o o o o  o  o
3033    row 5  |o o o d d d o o o o  o  o
3034           --------------------------
3035 .ve
3036 
3037   Thus, any entries in the d locations are stored in the d (diagonal)
3038   submatrix, and any entries in the o locations are stored in the
3039   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3040   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3041 
3042   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3043   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3044   In general, for PDE problems in which most nonzeros are near the diagonal,
3045   one expects `d_nz` >> `o_nz`.
3046 
3047   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3048   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3049   You can also run with the option `-info` and look for messages with the string
3050   malloc in them to see if additional memory allocation was needed.
3051 
3052 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3053 @*/
3054 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3055 {
3056   PetscFunctionBegin;
3057   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3058   PetscValidType(B, 1);
3059   PetscValidLogicalCollectiveInt(B, bs, 2);
3060   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3061   PetscFunctionReturn(PETSC_SUCCESS);
3062 }
3063 
3064 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3065 /*@
3066   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3067   (block compressed row).
3068 
3069   Collective
3070 
3071   Input Parameters:
3072 + comm  - MPI communicator
3073 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3074           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3075 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3076           This value should be the same as the local size used in creating the
3077           y vector for the matrix-vector product y = Ax.
3078 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3079           This value should be the same as the local size used in creating the
3080           x vector for the matrix-vector product y = Ax.
3081 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3082 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3083 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3084           submatrix  (same for all local rows)
3085 . d_nnz - array containing the number of nonzero blocks in the various block rows
3086           of the in diagonal portion of the local (possibly different for each block
3087           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3088           and set it even if it is zero.
3089 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3090           submatrix (same for all local rows).
3091 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3092           off-diagonal portion of the local submatrix (possibly different for
3093           each block row) or NULL.
3094 
3095   Output Parameter:
3096 . A - the matrix
3097 
3098   Options Database Keys:
3099 + -mat_block_size            - size of the blocks to use
3100 - -mat_use_hash_table <fact> - set hash table factor
3101 
3102   Level: intermediate
3103 
3104   Notes:
3105   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3106   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3107   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3108 
3109   For good matrix assembly performance
3110   the user should preallocate the matrix storage by setting the parameters
3111   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3112   performance can be increased by more than a factor of 50.
3113 
3114   If the *_nnz parameter is given then the *_nz parameter is ignored
3115 
3116   A nonzero block is any block that as 1 or more nonzeros in it
3117 
3118   The user MUST specify either the local or global matrix dimensions
3119   (possibly both).
3120 
3121   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3122   than it must be used on all processors that share the object for that argument.
3123 
3124   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3125   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3126 
3127   Storage Information:
3128   For a square global matrix we define each processor's diagonal portion
3129   to be its local rows and the corresponding columns (a square submatrix);
3130   each processor's off-diagonal portion encompasses the remainder of the
3131   local matrix (a rectangular submatrix).
3132 
3133   The user can specify preallocated storage for the diagonal part of
3134   the local submatrix with either d_nz or d_nnz (not both).  Set
3135   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3136   memory allocation.  Likewise, specify preallocated storage for the
3137   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3138 
3139   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3140   the figure below we depict these three local rows and all columns (0-11).
3141 
3142 .vb
3143            0 1 2 3 4 5 6 7 8 9 10 11
3144           --------------------------
3145    row 3  |o o o d d d o o o o  o  o
3146    row 4  |o o o d d d o o o o  o  o
3147    row 5  |o o o d d d o o o o  o  o
3148           --------------------------
3149 .ve
3150 
3151   Thus, any entries in the d locations are stored in the d (diagonal)
3152   submatrix, and any entries in the o locations are stored in the
3153   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3154   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3155 
3156   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3157   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3158   In general, for PDE problems in which most nonzeros are near the diagonal,
3159   one expects `d_nz` >> `o_nz`.
3160 
3161 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3162           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3163 @*/
3164 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3165 {
3166   PetscMPIInt size;
3167 
3168   PetscFunctionBegin;
3169   PetscCall(MatCreate(comm, A));
3170   PetscCall(MatSetSizes(*A, m, n, M, N));
3171   PetscCallMPI(MPI_Comm_size(comm, &size));
3172   if (size > 1) {
3173     PetscCall(MatSetType(*A, MATMPIBAIJ));
3174     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3175   } else {
3176     PetscCall(MatSetType(*A, MATSEQBAIJ));
3177     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3178   }
3179   PetscFunctionReturn(PETSC_SUCCESS);
3180 }
3181 
3182 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3183 {
3184   Mat          mat;
3185   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3186   PetscInt     len = 0;
3187 
3188   PetscFunctionBegin;
3189   *newmat = NULL;
3190   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3191   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3192   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3193 
3194   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3195   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3196   if (matin->hash_active) {
3197     PetscCall(MatSetUp(mat));
3198   } else {
3199     mat->factortype   = matin->factortype;
3200     mat->preallocated = PETSC_TRUE;
3201     mat->assembled    = PETSC_TRUE;
3202     mat->insertmode   = NOT_SET_VALUES;
3203 
3204     a             = (Mat_MPIBAIJ *)mat->data;
3205     mat->rmap->bs = matin->rmap->bs;
3206     a->bs2        = oldmat->bs2;
3207     a->mbs        = oldmat->mbs;
3208     a->nbs        = oldmat->nbs;
3209     a->Mbs        = oldmat->Mbs;
3210     a->Nbs        = oldmat->Nbs;
3211 
3212     a->size         = oldmat->size;
3213     a->rank         = oldmat->rank;
3214     a->donotstash   = oldmat->donotstash;
3215     a->roworiented  = oldmat->roworiented;
3216     a->rowindices   = NULL;
3217     a->rowvalues    = NULL;
3218     a->getrowactive = PETSC_FALSE;
3219     a->barray       = NULL;
3220     a->rstartbs     = oldmat->rstartbs;
3221     a->rendbs       = oldmat->rendbs;
3222     a->cstartbs     = oldmat->cstartbs;
3223     a->cendbs       = oldmat->cendbs;
3224 
3225     /* hash table stuff */
3226     a->ht           = NULL;
3227     a->hd           = NULL;
3228     a->ht_size      = 0;
3229     a->ht_flag      = oldmat->ht_flag;
3230     a->ht_fact      = oldmat->ht_fact;
3231     a->ht_total_ct  = 0;
3232     a->ht_insert_ct = 0;
3233 
3234     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3235     if (oldmat->colmap) {
3236 #if defined(PETSC_USE_CTABLE)
3237       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3238 #else
3239       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3240       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3241 #endif
3242     } else a->colmap = NULL;
3243 
3244     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3245       PetscCall(PetscMalloc1(len, &a->garray));
3246       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3247     } else a->garray = NULL;
3248 
3249     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3250     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3251     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3252 
3253     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3254     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3255   }
3256   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3257   *newmat = mat;
3258   PetscFunctionReturn(PETSC_SUCCESS);
3259 }
3260 
3261 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3262 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3263 {
3264   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3265   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3266   PetscScalar *matvals;
3267 
3268   PetscFunctionBegin;
3269   PetscCall(PetscViewerSetUp(viewer));
3270 
3271   /* read in matrix header */
3272   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3273   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3274   M  = header[1];
3275   N  = header[2];
3276   nz = header[3];
3277   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3278   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3279   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3280 
3281   /* set block sizes from the viewer's .info file */
3282   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3283   /* set local sizes if not set already */
3284   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3285   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3286   /* set global sizes if not set already */
3287   if (mat->rmap->N < 0) mat->rmap->N = M;
3288   if (mat->cmap->N < 0) mat->cmap->N = N;
3289   PetscCall(PetscLayoutSetUp(mat->rmap));
3290   PetscCall(PetscLayoutSetUp(mat->cmap));
3291 
3292   /* check if the matrix sizes are correct */
3293   PetscCall(MatGetSize(mat, &rows, &cols));
3294   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3295   PetscCall(MatGetBlockSize(mat, &bs));
3296   PetscCall(MatGetLocalSize(mat, &m, &n));
3297   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3298   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3299   mbs = m / bs;
3300   nbs = n / bs;
3301 
3302   /* read in row lengths and build row indices */
3303   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3304   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3305   rowidxs[0] = 0;
3306   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3307   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3308   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3309 
3310   /* read in column indices and matrix values */
3311   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3312   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3313   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3314 
3315   {                /* preallocate matrix storage */
3316     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3317     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3318     PetscBool  sbaij, done;
3319     PetscInt  *d_nnz, *o_nnz;
3320 
3321     PetscCall(PetscBTCreate(nbs, &bt));
3322     PetscCall(PetscHSetICreate(&ht));
3323     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3324     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3325     for (i = 0; i < mbs; i++) {
3326       PetscCall(PetscBTMemzero(nbs, bt));
3327       PetscCall(PetscHSetIClear(ht));
3328       for (k = 0; k < bs; k++) {
3329         PetscInt row = bs * i + k;
3330         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3331           PetscInt col = colidxs[j];
3332           if (!sbaij || col >= row) {
3333             if (col >= cs && col < ce) {
3334               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3335             } else {
3336               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3337               if (done) o_nnz[i]++;
3338             }
3339           }
3340         }
3341       }
3342     }
3343     PetscCall(PetscBTDestroy(&bt));
3344     PetscCall(PetscHSetIDestroy(&ht));
3345     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3346     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3347     PetscCall(PetscFree2(d_nnz, o_nnz));
3348   }
3349 
3350   /* store matrix values */
3351   for (i = 0; i < m; i++) {
3352     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3353     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3354   }
3355 
3356   PetscCall(PetscFree(rowidxs));
3357   PetscCall(PetscFree2(colidxs, matvals));
3358   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3359   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3360   PetscFunctionReturn(PETSC_SUCCESS);
3361 }
3362 
3363 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3364 {
3365   PetscBool isbinary;
3366 
3367   PetscFunctionBegin;
3368   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3369   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3370   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3371   PetscFunctionReturn(PETSC_SUCCESS);
3372 }
3373 
3374 /*@
3375   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3376 
3377   Input Parameters:
3378 + mat  - the matrix
3379 - fact - factor
3380 
3381   Options Database Key:
3382 . -mat_use_hash_table <fact> - provide the factor
3383 
3384   Level: advanced
3385 
3386 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3387 @*/
3388 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3389 {
3390   PetscFunctionBegin;
3391   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3392   PetscFunctionReturn(PETSC_SUCCESS);
3393 }
3394 
3395 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3396 {
3397   Mat_MPIBAIJ *baij;
3398 
3399   PetscFunctionBegin;
3400   baij          = (Mat_MPIBAIJ *)mat->data;
3401   baij->ht_fact = fact;
3402   PetscFunctionReturn(PETSC_SUCCESS);
3403 }
3404 
3405 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3406 {
3407   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3408   PetscBool    flg;
3409 
3410   PetscFunctionBegin;
3411   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3412   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3413   if (Ad) *Ad = a->A;
3414   if (Ao) *Ao = a->B;
3415   if (colmap) *colmap = a->garray;
3416   PetscFunctionReturn(PETSC_SUCCESS);
3417 }
3418 
3419 /*
3420     Special version for direct calls from Fortran (to eliminate two function call overheads
3421 */
3422 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3423   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3424 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3425   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3426 #endif
3427 
3428 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3429 /*@C
3430   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3431 
3432   Collective
3433 
3434   Input Parameters:
3435 + matin  - the matrix
3436 . min    - number of input rows
3437 . im     - input rows
3438 . nin    - number of input columns
3439 . in     - input columns
3440 . v      - numerical values input
3441 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3442 
3443   Level: advanced
3444 
3445   Developer Notes:
3446   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3447 
3448 .seealso: `Mat`, `MatSetValuesBlocked()`
3449 @*/
3450 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3451 {
3452   /* convert input arguments to C version */
3453   Mat        mat = *matin;
3454   PetscInt   m = *min, n = *nin;
3455   InsertMode addv = *addvin;
3456 
3457   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3458   const MatScalar *value;
3459   MatScalar       *barray      = baij->barray;
3460   PetscBool        roworiented = baij->roworiented;
3461   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3462   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3463   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3464 
3465   PetscFunctionBegin;
3466   /* tasks normally handled by MatSetValuesBlocked() */
3467   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3468   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3469   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3470   if (mat->assembled) {
3471     mat->was_assembled = PETSC_TRUE;
3472     mat->assembled     = PETSC_FALSE;
3473   }
3474   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3475 
3476   if (!barray) {
3477     PetscCall(PetscMalloc1(bs2, &barray));
3478     baij->barray = barray;
3479   }
3480 
3481   if (roworiented) stepval = (n - 1) * bs;
3482   else stepval = (m - 1) * bs;
3483 
3484   for (i = 0; i < m; i++) {
3485     if (im[i] < 0) continue;
3486     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3487     if (im[i] >= rstart && im[i] < rend) {
3488       row = im[i] - rstart;
3489       for (j = 0; j < n; j++) {
3490         /* If NumCol = 1 then a copy is not required */
3491         if ((roworiented) && (n == 1)) {
3492           barray = (MatScalar *)v + i * bs2;
3493         } else if ((!roworiented) && (m == 1)) {
3494           barray = (MatScalar *)v + j * bs2;
3495         } else { /* Here a copy is required */
3496           if (roworiented) {
3497             value = v + i * (stepval + bs) * bs + j * bs;
3498           } else {
3499             value = v + j * (stepval + bs) * bs + i * bs;
3500           }
3501           for (ii = 0; ii < bs; ii++, value += stepval) {
3502             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3503           }
3504           barray -= bs2;
3505         }
3506 
3507         if (in[j] >= cstart && in[j] < cend) {
3508           col = in[j] - cstart;
3509           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3510         } else if (in[j] < 0) {
3511           continue;
3512         } else {
3513           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3514           if (mat->was_assembled) {
3515             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3516 
3517 #if defined(PETSC_USE_DEBUG)
3518   #if defined(PETSC_USE_CTABLE)
3519             {
3520               PetscInt data;
3521               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3522               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3523             }
3524   #else
3525             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3526   #endif
3527 #endif
3528 #if defined(PETSC_USE_CTABLE)
3529             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3530             col = (col - 1) / bs;
3531 #else
3532             col = (baij->colmap[in[j]] - 1) / bs;
3533 #endif
3534             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3535               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3536               col = in[j];
3537             }
3538           } else col = in[j];
3539           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3540         }
3541       }
3542     } else {
3543       if (!baij->donotstash) {
3544         if (roworiented) {
3545           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3546         } else {
3547           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3548         }
3549       }
3550     }
3551   }
3552 
3553   /* task normally handled by MatSetValuesBlocked() */
3554   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3555   PetscFunctionReturn(PETSC_SUCCESS);
3556 }
3557 
3558 /*@
3559   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3560 
3561   Collective
3562 
3563   Input Parameters:
3564 + comm - MPI communicator
3565 . bs   - the block size, only a block size of 1 is supported
3566 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3567 . n    - This value should be the same as the local size used in creating the
3568          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3569          calculated if `N` is given) For square matrices `n` is almost always `m`.
3570 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3571 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3572 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3573 . j    - column indices
3574 - a    - matrix values
3575 
3576   Output Parameter:
3577 . mat - the matrix
3578 
3579   Level: intermediate
3580 
3581   Notes:
3582   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3583   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3584   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3585 
3586   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3587   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3588   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3589   with column-major ordering within blocks.
3590 
3591   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3592 
3593 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3594           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3595 @*/
3596 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3597 {
3598   PetscFunctionBegin;
3599   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3600   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3601   PetscCall(MatCreate(comm, mat));
3602   PetscCall(MatSetSizes(*mat, m, n, M, N));
3603   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3604   PetscCall(MatSetBlockSize(*mat, bs));
3605   PetscCall(MatSetUp(*mat));
3606   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3607   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3608   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3609   PetscFunctionReturn(PETSC_SUCCESS);
3610 }
3611 
3612 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3613 {
3614   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3615   PetscInt    *indx;
3616   PetscScalar *values;
3617 
3618   PetscFunctionBegin;
3619   PetscCall(MatGetSize(inmat, &m, &N));
3620   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3621     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3622     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3623     PetscInt    *bindx, rmax = a->rmax, j;
3624     PetscMPIInt  rank, size;
3625 
3626     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3627     mbs = m / bs;
3628     Nbs = N / cbs;
3629     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3630     nbs = n / cbs;
3631 
3632     PetscCall(PetscMalloc1(rmax, &bindx));
3633     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3634 
3635     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3636     PetscCallMPI(MPI_Comm_rank(comm, &size));
3637     if (rank == size - 1) {
3638       /* Check sum(nbs) = Nbs */
3639       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3640     }
3641 
3642     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3643     for (i = 0; i < mbs; i++) {
3644       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3645       nnz = nnz / bs;
3646       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3647       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3648       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3649     }
3650     PetscCall(PetscFree(bindx));
3651 
3652     PetscCall(MatCreate(comm, outmat));
3653     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3654     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3655     PetscCall(MatSetType(*outmat, MATBAIJ));
3656     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3657     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3658     MatPreallocateEnd(dnz, onz);
3659     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3660   }
3661 
3662   /* numeric phase */
3663   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3664   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3665 
3666   for (i = 0; i < m; i++) {
3667     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3668     Ii = i + rstart;
3669     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3670     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3671   }
3672   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3673   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3674   PetscFunctionReturn(PETSC_SUCCESS);
3675 }
3676