xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c56ceb406e7f1e4076bf88d4e7444df54c64fc7a)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
147     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
187     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_DEBUG)
444   #if defined(PETSC_USE_CTABLE)
445             {
446               PetscInt data;
447               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449             }
450   #else
451             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452   #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456             col = (col - 1) / bs;
457 #else
458             col = (baij->colmap[in[j]] - 1) / bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
461               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462               col = in[j];
463             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464           } else col = in[j];
465           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466         }
467       }
468     } else {
469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473         } else {
474           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 #define HASH_KEY             0.6180339887
483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487 {
488   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489   PetscBool    roworiented = baij->roworiented;
490   PetscInt     i, j, row, col;
491   PetscInt     rstart_orig = mat->rmap->rstart;
492   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494   PetscReal    tmp;
495   MatScalar  **HD       = baij->hd, value;
496   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
497 
498   PetscFunctionBegin;
499   for (i = 0; i < m; i++) {
500     if (PetscDefined(USE_DEBUG)) {
501       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503     }
504     row = im[i];
505     if (row >= rstart_orig && row < rend_orig) {
506       for (j = 0; j < n; j++) {
507         col = in[j];
508         if (roworiented) value = v[i * n + j];
509         else value = v[i + j * m];
510         /* Look up PetscInto the Hash Table */
511         key = (row / bs) * Nbs + (col / bs) + 1;
512         h1  = HASH(size, key, tmp);
513 
514         idx = h1;
515         if (PetscDefined(USE_DEBUG)) {
516           insert_ct++;
517           total_ct++;
518           if (HT[idx] != key) {
519             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
520               ;
521             if (idx == size) {
522               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
523                 ;
524               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
525             }
526           }
527         } else if (HT[idx] != key) {
528           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
529             ;
530           if (idx == size) {
531             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
532               ;
533             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
534           }
535         }
536         /* A HASH table entry is found, so insert the values at the correct address */
537         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
538         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
539       }
540     } else if (!baij->donotstash) {
541       if (roworiented) {
542         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
543       } else {
544         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
545       }
546     }
547   }
548   if (PetscDefined(USE_DEBUG)) {
549     baij->ht_total_ct += total_ct;
550     baij->ht_insert_ct += insert_ct;
551   }
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
556 {
557   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
558   PetscBool          roworiented = baij->roworiented;
559   PetscInt           i, j, ii, jj, row, col;
560   PetscInt           rstart = baij->rstartbs;
561   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
562   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
563   PetscReal          tmp;
564   MatScalar        **HD = baij->hd, *baij_a;
565   const PetscScalar *v_t, *value;
566   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
567 
568   PetscFunctionBegin;
569   if (roworiented) stepval = (n - 1) * bs;
570   else stepval = (m - 1) * bs;
571 
572   for (i = 0; i < m; i++) {
573     if (PetscDefined(USE_DEBUG)) {
574       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
575       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
576     }
577     row = im[i];
578     v_t = v + i * nbs2;
579     if (row >= rstart && row < rend) {
580       for (j = 0; j < n; j++) {
581         col = in[j];
582 
583         /* Look up into the Hash Table */
584         key = row * Nbs + col + 1;
585         h1  = HASH(size, key, tmp);
586 
587         idx = h1;
588         if (PetscDefined(USE_DEBUG)) {
589           total_ct++;
590           insert_ct++;
591           if (HT[idx] != key) {
592             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
593               ;
594             if (idx == size) {
595               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
596                 ;
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
602             ;
603           if (idx == size) {
604             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
605               ;
606             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
607           }
608         }
609         baij_a = HD[idx];
610         if (roworiented) {
611           /*value = v + i*(stepval+bs)*bs + j*bs;*/
612           /* value = v + (i*(stepval+bs)+j)*bs; */
613           value = v_t;
614           v_t += bs;
615           if (addv == ADD_VALUES) {
616             for (ii = 0; ii < bs; ii++, value += stepval) {
617               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
618             }
619           } else {
620             for (ii = 0; ii < bs; ii++, value += stepval) {
621               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
622             }
623           }
624         } else {
625           value = v + j * (stepval + bs) * bs + i * bs;
626           if (addv == ADD_VALUES) {
627             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
628               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
629             }
630           } else {
631             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
632               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
633             }
634           }
635         }
636       }
637     } else {
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   if (PetscDefined(USE_DEBUG)) {
648     baij->ht_total_ct += total_ct;
649     baij->ht_insert_ct += insert_ct;
650   }
651   PetscFunctionReturn(PETSC_SUCCESS);
652 }
653 
654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
655 {
656   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
657   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
658   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
659 
660   PetscFunctionBegin;
661   for (i = 0; i < m; i++) {
662     if (idxm[i] < 0) continue; /* negative row */
663     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
664     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
665       row = idxm[i] - bsrstart;
666       for (j = 0; j < n; j++) {
667         if (idxn[j] < 0) continue; /* negative column */
668         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
669         if (idxn[j] >= bscstart && idxn[j] < bscend) {
670           col = idxn[j] - bscstart;
671           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
672         } else {
673           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
674 #if defined(PETSC_USE_CTABLE)
675           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
676           data--;
677 #else
678           data = baij->colmap[idxn[j] / bs] - 1;
679 #endif
680           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
681           else {
682             col = data + idxn[j] % bs;
683             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
684           }
685         }
686       }
687     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
688   }
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
693 {
694   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
695   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
696   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
697   PetscReal    sum = 0.0;
698   MatScalar   *v;
699 
700   PetscFunctionBegin;
701   if (baij->size == 1) {
702     PetscCall(MatNorm(baij->A, type, nrm));
703   } else {
704     if (type == NORM_FROBENIUS) {
705       v  = amat->a;
706       nz = amat->nz * bs2;
707       for (i = 0; i < nz; i++) {
708         sum += PetscRealPart(PetscConj(*v) * (*v));
709         v++;
710       }
711       v  = bmat->a;
712       nz = bmat->nz * bs2;
713       for (i = 0; i < nz; i++) {
714         sum += PetscRealPart(PetscConj(*v) * (*v));
715         v++;
716       }
717       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
718       *nrm = PetscSqrtReal(*nrm);
719     } else if (type == NORM_1) { /* max column sum */
720       PetscReal *tmp, *tmp2;
721       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
749       *nrm = 0.0;
750       for (j = 0; j < mat->cmap->N; j++) {
751         if (tmp2[j] > *nrm) *nrm = tmp2[j];
752       }
753       PetscCall(PetscFree(tmp));
754       PetscCall(PetscFree(tmp2));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entried need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, other_disassembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the row-oriented flag to column-oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented                             = PETSC_FALSE;
938     a->roworiented                                = PETSC_FALSE;
939     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE;
940     while (1) {
941       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
942       if (!flg) break;
943 
944       for (i = 0; i < n;) {
945         /* Now identify the consecutive vals belonging to the same row */
946         for (j = i, rstart = row[j]; j < n; j++) {
947           if (row[j] != rstart) break;
948         }
949         if (j < n) ncols = j - i;
950         else ncols = n - i;
951         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
952         i = j;
953       }
954     }
955     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
956 
957     baij->roworiented                           = r1;
958     a->roworiented                              = r2;
959     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
960   }
961 
962   PetscCall(MatAssemblyBegin(baij->A, mode));
963   PetscCall(MatAssemblyEnd(baij->A, mode));
964 
965   /* determine if any processor has disassembled, if so we must
966      also disassemble ourselves, in order that we may reassemble. */
967   /*
968      if nonzero structure of submatrix B cannot change then we know that
969      no processor disassembled thus we can skip this stuff
970   */
971   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
972     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
973     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
974   }
975 
976   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
977   PetscCall(MatAssemblyBegin(baij->B, mode));
978   PetscCall(MatAssemblyEnd(baij->B, mode));
979 
980 #if defined(PETSC_USE_INFO)
981   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
982     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
983 
984     baij->ht_total_ct  = 0;
985     baij->ht_insert_ct = 0;
986   }
987 #endif
988   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
989     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
990 
991     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
992     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
993   }
994 
995   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
996 
997   baij->rowvalues = NULL;
998 
999   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1000   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1001     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1002     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1008 #include <petscdraw.h>
1009 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1010 {
1011   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1012   PetscMPIInt       rank = baij->rank;
1013   PetscInt          bs   = mat->rmap->bs;
1014   PetscBool         iascii, isdraw;
1015   PetscViewer       sviewer;
1016   PetscViewerFormat format;
1017 
1018   PetscFunctionBegin;
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1021   if (iascii) {
1022     PetscCall(PetscViewerGetFormat(viewer, &format));
1023     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1024       MatInfo info;
1025       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1026       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1027       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1028       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1029                                                    mat->rmap->bs, (double)info.memory));
1030       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1031       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1032       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1033       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1034       PetscCall(PetscViewerFlush(viewer));
1035       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1036       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1037       PetscCall(VecScatterView(baij->Mvctx, viewer));
1038       PetscFunctionReturn(PETSC_SUCCESS);
1039     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1040       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1041       PetscFunctionReturn(PETSC_SUCCESS);
1042     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1043       PetscFunctionReturn(PETSC_SUCCESS);
1044     }
1045   }
1046 
1047   if (isdraw) {
1048     PetscDraw draw;
1049     PetscBool isnull;
1050     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1051     PetscCall(PetscDrawIsNull(draw, &isnull));
1052     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1053   }
1054 
1055   {
1056     /* assemble the entire matrix onto first processor. */
1057     Mat          A;
1058     Mat_SeqBAIJ *Aloc;
1059     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1060     MatScalar   *a;
1061     const char  *matname;
1062 
1063     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1064     /* Perhaps this should be the type of mat? */
1065     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1066     if (rank == 0) {
1067       PetscCall(MatSetSizes(A, M, N, M, N));
1068     } else {
1069       PetscCall(MatSetSizes(A, 0, 0, M, N));
1070     }
1071     PetscCall(MatSetType(A, MATMPIBAIJ));
1072     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1073     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1074 
1075     /* copy over the A part */
1076     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1077     ai   = Aloc->i;
1078     aj   = Aloc->j;
1079     a    = Aloc->a;
1080     PetscCall(PetscMalloc1(bs, &rvals));
1081 
1082     for (i = 0; i < mbs; i++) {
1083       rvals[0] = bs * (baij->rstartbs + i);
1084       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1085       for (j = ai[i]; j < ai[i + 1]; j++) {
1086         col = (baij->cstartbs + aj[j]) * bs;
1087         for (k = 0; k < bs; k++) {
1088           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1089           col++;
1090           a += bs;
1091         }
1092       }
1093     }
1094     /* copy over the B part */
1095     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1096     ai   = Aloc->i;
1097     aj   = Aloc->j;
1098     a    = Aloc->a;
1099     for (i = 0; i < mbs; i++) {
1100       rvals[0] = bs * (baij->rstartbs + i);
1101       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1102       for (j = ai[i]; j < ai[i + 1]; j++) {
1103         col = baij->garray[aj[j]] * bs;
1104         for (k = 0; k < bs; k++) {
1105           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1106           col++;
1107           a += bs;
1108         }
1109       }
1110     }
1111     PetscCall(PetscFree(rvals));
1112     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1113     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1114     /*
1115        Everyone has to call to draw the matrix since the graphics waits are
1116        synchronized across all processors that share the PetscDraw object
1117     */
1118     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1119     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1120     if (rank == 0) {
1121       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1122       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1123     }
1124     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1125     PetscCall(MatDestroy(&A));
1126   }
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1131 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1132 {
1133   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1134   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1135   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1136   const PetscInt *garray = aij->garray;
1137   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1138   PetscInt64      nz, hnz;
1139   PetscInt       *rowlens, *colidxs;
1140   PetscScalar    *matvals;
1141   PetscMPIInt     rank;
1142 
1143   PetscFunctionBegin;
1144   PetscCall(PetscViewerSetUp(viewer));
1145 
1146   M  = mat->rmap->N;
1147   N  = mat->cmap->N;
1148   m  = mat->rmap->n;
1149   rs = mat->rmap->rstart;
1150   cs = mat->cmap->rstart;
1151   bs = mat->rmap->bs;
1152   nz = bs * bs * (A->nz + B->nz);
1153 
1154   /* write matrix header */
1155   header[0] = MAT_FILE_CLASSID;
1156   header[1] = M;
1157   header[2] = N;
1158   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1159   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1160   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1161   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1162 
1163   /* fill in and store row lengths */
1164   PetscCall(PetscMalloc1(m, &rowlens));
1165   for (cnt = 0, i = 0; i < A->mbs; i++)
1166     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1167   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1168   PetscCall(PetscFree(rowlens));
1169 
1170   /* fill in and store column indices */
1171   PetscCall(PetscMalloc1(nz, &colidxs));
1172   for (cnt = 0, i = 0; i < A->mbs; i++) {
1173     for (k = 0; k < bs; k++) {
1174       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1175         if (garray[B->j[jb]] > cs / bs) break;
1176         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1177       }
1178       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1179         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1180       for (; jb < B->i[i + 1]; jb++)
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1182     }
1183   }
1184   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1185   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1186   PetscCall(PetscFree(colidxs));
1187 
1188   /* fill in and store nonzero values */
1189   PetscCall(PetscMalloc1(nz, &matvals));
1190   for (cnt = 0, i = 0; i < A->mbs; i++) {
1191     for (k = 0; k < bs; k++) {
1192       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1193         if (garray[B->j[jb]] > cs / bs) break;
1194         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1195       }
1196       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1197         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1198       for (; jb < B->i[i + 1]; jb++)
1199         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1200     }
1201   }
1202   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1203   PetscCall(PetscFree(matvals));
1204 
1205   /* write block size option to the viewer's .info file */
1206   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1207   PetscFunctionReturn(PETSC_SUCCESS);
1208 }
1209 
1210 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1211 {
1212   PetscBool iascii, isdraw, issocket, isbinary;
1213 
1214   PetscFunctionBegin;
1215   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1216   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1219   if (iascii || isdraw || issocket) {
1220     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1221   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1222   PetscFunctionReturn(PETSC_SUCCESS);
1223 }
1224 
1225 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1226 {
1227   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1228   PetscInt     nt;
1229 
1230   PetscFunctionBegin;
1231   PetscCall(VecGetLocalSize(xx, &nt));
1232   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1233   PetscCall(VecGetLocalSize(yy, &nt));
1234   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1235   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1236   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1237   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1238   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1243 {
1244   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1245 
1246   PetscFunctionBegin;
1247   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1248   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1249   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1250   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1251   PetscFunctionReturn(PETSC_SUCCESS);
1252 }
1253 
1254 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1255 {
1256   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1257 
1258   PetscFunctionBegin;
1259   /* do nondiagonal part */
1260   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1261   /* do local part */
1262   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1263   /* add partial results together */
1264   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1265   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1266   PetscFunctionReturn(PETSC_SUCCESS);
1267 }
1268 
1269 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1270 {
1271   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1272 
1273   PetscFunctionBegin;
1274   /* do nondiagonal part */
1275   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1276   /* do local part */
1277   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1278   /* add partial results together */
1279   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1280   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1281   PetscFunctionReturn(PETSC_SUCCESS);
1282 }
1283 
1284 /*
1285   This only works correctly for square matrices where the subblock A->A is the
1286    diagonal block
1287 */
1288 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1289 {
1290   PetscFunctionBegin;
1291   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1292   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 
1296 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1297 {
1298   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1299 
1300   PetscFunctionBegin;
1301   PetscCall(MatScale(a->A, aa));
1302   PetscCall(MatScale(a->B, aa));
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1307 {
1308   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1309   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1310   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1311   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1312   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1313 
1314   PetscFunctionBegin;
1315   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1316   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1317   mat->getrowactive = PETSC_TRUE;
1318 
1319   if (!mat->rowvalues && (idx || v)) {
1320     /*
1321         allocate enough space to hold information from the longest row.
1322     */
1323     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1324     PetscInt     max = 1, mbs = mat->mbs, tmp;
1325     for (i = 0; i < mbs; i++) {
1326       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1327       if (max < tmp) max = tmp;
1328     }
1329     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1330   }
1331   lrow = row - brstart;
1332 
1333   pvA = &vworkA;
1334   pcA = &cworkA;
1335   pvB = &vworkB;
1336   pcB = &cworkB;
1337   if (!v) {
1338     pvA = NULL;
1339     pvB = NULL;
1340   }
1341   if (!idx) {
1342     pcA = NULL;
1343     if (!v) pcB = NULL;
1344   }
1345   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1346   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1347   nztot = nzA + nzB;
1348 
1349   cmap = mat->garray;
1350   if (v || idx) {
1351     if (nztot) {
1352       /* Sort by increasing column numbers, assuming A and B already sorted */
1353       PetscInt imark = -1;
1354       if (v) {
1355         *v = v_p = mat->rowvalues;
1356         for (i = 0; i < nzB; i++) {
1357           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1358           else break;
1359         }
1360         imark = i;
1361         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1362         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1363       }
1364       if (idx) {
1365         *idx = idx_p = mat->rowindices;
1366         if (imark > -1) {
1367           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368         } else {
1369           for (i = 0; i < nzB; i++) {
1370             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1371             else break;
1372           }
1373           imark = i;
1374         }
1375         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1376         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1377       }
1378     } else {
1379       if (idx) *idx = NULL;
1380       if (v) *v = NULL;
1381     }
1382   }
1383   *nz = nztot;
1384   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1385   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1390 {
1391   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1392 
1393   PetscFunctionBegin;
1394   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1395   baij->getrowactive = PETSC_FALSE;
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1400 {
1401   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1402 
1403   PetscFunctionBegin;
1404   PetscCall(MatZeroEntries(l->A));
1405   PetscCall(MatZeroEntries(l->B));
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1410 {
1411   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1412   Mat            A = a->A, B = a->B;
1413   PetscLogDouble isend[5], irecv[5];
1414 
1415   PetscFunctionBegin;
1416   info->block_size = (PetscReal)matin->rmap->bs;
1417 
1418   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1419 
1420   isend[0] = info->nz_used;
1421   isend[1] = info->nz_allocated;
1422   isend[2] = info->nz_unneeded;
1423   isend[3] = info->memory;
1424   isend[4] = info->mallocs;
1425 
1426   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1427 
1428   isend[0] += info->nz_used;
1429   isend[1] += info->nz_allocated;
1430   isend[2] += info->nz_unneeded;
1431   isend[3] += info->memory;
1432   isend[4] += info->mallocs;
1433 
1434   if (flag == MAT_LOCAL) {
1435     info->nz_used      = isend[0];
1436     info->nz_allocated = isend[1];
1437     info->nz_unneeded  = isend[2];
1438     info->memory       = isend[3];
1439     info->mallocs      = isend[4];
1440   } else if (flag == MAT_GLOBAL_MAX) {
1441     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1442 
1443     info->nz_used      = irecv[0];
1444     info->nz_allocated = irecv[1];
1445     info->nz_unneeded  = irecv[2];
1446     info->memory       = irecv[3];
1447     info->mallocs      = irecv[4];
1448   } else if (flag == MAT_GLOBAL_SUM) {
1449     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1450 
1451     info->nz_used      = irecv[0];
1452     info->nz_allocated = irecv[1];
1453     info->nz_unneeded  = irecv[2];
1454     info->memory       = irecv[3];
1455     info->mallocs      = irecv[4];
1456   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1457   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1458   info->fill_ratio_needed = 0;
1459   info->factor_mallocs    = 0;
1460   PetscFunctionReturn(PETSC_SUCCESS);
1461 }
1462 
1463 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1464 {
1465   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1466 
1467   PetscFunctionBegin;
1468   switch (op) {
1469   case MAT_NEW_NONZERO_LOCATIONS:
1470   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1471   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1472   case MAT_KEEP_NONZERO_PATTERN:
1473   case MAT_NEW_NONZERO_LOCATION_ERR:
1474     MatCheckPreallocated(A, 1);
1475     PetscCall(MatSetOption(a->A, op, flg));
1476     PetscCall(MatSetOption(a->B, op, flg));
1477     break;
1478   case MAT_ROW_ORIENTED:
1479     MatCheckPreallocated(A, 1);
1480     a->roworiented = flg;
1481 
1482     PetscCall(MatSetOption(a->A, op, flg));
1483     PetscCall(MatSetOption(a->B, op, flg));
1484     break;
1485   case MAT_FORCE_DIAGONAL_ENTRIES:
1486   case MAT_SORTED_FULL:
1487     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1488     break;
1489   case MAT_IGNORE_OFF_PROC_ENTRIES:
1490     a->donotstash = flg;
1491     break;
1492   case MAT_USE_HASH_TABLE:
1493     a->ht_flag = flg;
1494     a->ht_fact = 1.39;
1495     break;
1496   case MAT_SYMMETRIC:
1497   case MAT_STRUCTURALLY_SYMMETRIC:
1498   case MAT_HERMITIAN:
1499   case MAT_SUBMAT_SINGLEIS:
1500   case MAT_SYMMETRY_ETERNAL:
1501   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1502   case MAT_SPD_ETERNAL:
1503     /* if the diagonal matrix is square it inherits some of the properties above */
1504     break;
1505   default:
1506     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1507   }
1508   PetscFunctionReturn(PETSC_SUCCESS);
1509 }
1510 
1511 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1512 {
1513   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1514   Mat_SeqBAIJ *Aloc;
1515   Mat          B;
1516   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1517   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1518   MatScalar   *a;
1519 
1520   PetscFunctionBegin;
1521   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1522   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1523     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1524     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1525     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1526     /* Do not know preallocation information, but must set block size */
1527     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1528   } else {
1529     B = *matout;
1530   }
1531 
1532   /* copy over the A part */
1533   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1534   ai   = Aloc->i;
1535   aj   = Aloc->j;
1536   a    = Aloc->a;
1537   PetscCall(PetscMalloc1(bs, &rvals));
1538 
1539   for (i = 0; i < mbs; i++) {
1540     rvals[0] = bs * (baij->rstartbs + i);
1541     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1542     for (j = ai[i]; j < ai[i + 1]; j++) {
1543       col = (baij->cstartbs + aj[j]) * bs;
1544       for (k = 0; k < bs; k++) {
1545         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1546 
1547         col++;
1548         a += bs;
1549       }
1550     }
1551   }
1552   /* copy over the B part */
1553   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1554   ai   = Aloc->i;
1555   aj   = Aloc->j;
1556   a    = Aloc->a;
1557   for (i = 0; i < mbs; i++) {
1558     rvals[0] = bs * (baij->rstartbs + i);
1559     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1560     for (j = ai[i]; j < ai[i + 1]; j++) {
1561       col = baij->garray[aj[j]] * bs;
1562       for (k = 0; k < bs; k++) {
1563         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1564         col++;
1565         a += bs;
1566       }
1567     }
1568   }
1569   PetscCall(PetscFree(rvals));
1570   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1571   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1572 
1573   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1574   else PetscCall(MatHeaderMerge(A, &B));
1575   PetscFunctionReturn(PETSC_SUCCESS);
1576 }
1577 
1578 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1579 {
1580   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1581   Mat          a = baij->A, b = baij->B;
1582   PetscInt     s1, s2, s3;
1583 
1584   PetscFunctionBegin;
1585   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1586   if (rr) {
1587     PetscCall(VecGetLocalSize(rr, &s1));
1588     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1589     /* Overlap communication with computation. */
1590     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1591   }
1592   if (ll) {
1593     PetscCall(VecGetLocalSize(ll, &s1));
1594     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1595     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1596   }
1597   /* scale  the diagonal block */
1598   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1599 
1600   if (rr) {
1601     /* Do a scatter end and then right scale the off-diagonal block */
1602     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1603     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1604   }
1605   PetscFunctionReturn(PETSC_SUCCESS);
1606 }
1607 
1608 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1609 {
1610   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1611   PetscInt    *lrows;
1612   PetscInt     r, len;
1613   PetscBool    cong;
1614 
1615   PetscFunctionBegin;
1616   /* get locally owned rows */
1617   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1618   /* fix right hand side if needed */
1619   if (x && b) {
1620     const PetscScalar *xx;
1621     PetscScalar       *bb;
1622 
1623     PetscCall(VecGetArrayRead(x, &xx));
1624     PetscCall(VecGetArray(b, &bb));
1625     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1626     PetscCall(VecRestoreArrayRead(x, &xx));
1627     PetscCall(VecRestoreArray(b, &bb));
1628   }
1629 
1630   /* actually zap the local rows */
1631   /*
1632         Zero the required rows. If the "diagonal block" of the matrix
1633      is square and the user wishes to set the diagonal we use separate
1634      code so that MatSetValues() is not called for each diagonal allocating
1635      new memory, thus calling lots of mallocs and slowing things down.
1636 
1637   */
1638   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1639   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1640   PetscCall(MatHasCongruentLayouts(A, &cong));
1641   if ((diag != 0.0) && cong) {
1642     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1643   } else if (diag != 0.0) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1645     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1646        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1647     for (r = 0; r < len; ++r) {
1648       const PetscInt row = lrows[r] + A->rmap->rstart;
1649       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1650     }
1651     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1652     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1653   } else {
1654     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1655   }
1656   PetscCall(PetscFree(lrows));
1657 
1658   /* only change matrix nonzero state if pattern was allowed to be changed */
1659   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern || !((Mat_SeqBAIJ *)(l->A->data))->nonew) {
1660     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1661     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1662   }
1663   PetscFunctionReturn(PETSC_SUCCESS);
1664 }
1665 
1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1667 {
1668   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1669   PetscMPIInt        n = A->rmap->n, p = 0;
1670   PetscInt           i, j, k, r, len = 0, row, col, count;
1671   PetscInt          *lrows, *owners = A->rmap->range;
1672   PetscSFNode       *rrows;
1673   PetscSF            sf;
1674   const PetscScalar *xx;
1675   PetscScalar       *bb, *mask;
1676   Vec                xmask, lmask;
1677   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1678   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1679   PetscScalar       *aa;
1680 
1681   PetscFunctionBegin;
1682   /* Create SF where leaves are input rows and roots are owned rows */
1683   PetscCall(PetscMalloc1(n, &lrows));
1684   for (r = 0; r < n; ++r) lrows[r] = -1;
1685   PetscCall(PetscMalloc1(N, &rrows));
1686   for (r = 0; r < N; ++r) {
1687     const PetscInt idx = rows[r];
1688     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1689     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1690       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1691     }
1692     rrows[r].rank  = p;
1693     rrows[r].index = rows[r] - owners[p];
1694   }
1695   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1696   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1697   /* Collect flags for rows to be zeroed */
1698   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1699   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1700   PetscCall(PetscSFDestroy(&sf));
1701   /* Compress and put in row numbers */
1702   for (r = 0; r < n; ++r)
1703     if (lrows[r] >= 0) lrows[len++] = r;
1704   /* zero diagonal part of matrix */
1705   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1706   /* handle off-diagonal part of matrix */
1707   PetscCall(MatCreateVecs(A, &xmask, NULL));
1708   PetscCall(VecDuplicate(l->lvec, &lmask));
1709   PetscCall(VecGetArray(xmask, &bb));
1710   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1711   PetscCall(VecRestoreArray(xmask, &bb));
1712   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1713   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1714   PetscCall(VecDestroy(&xmask));
1715   if (x) {
1716     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1717     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1718     PetscCall(VecGetArrayRead(l->lvec, &xx));
1719     PetscCall(VecGetArray(b, &bb));
1720   }
1721   PetscCall(VecGetArray(lmask, &mask));
1722   /* remove zeroed rows of off-diagonal matrix */
1723   for (i = 0; i < len; ++i) {
1724     row   = lrows[i];
1725     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1726     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1727     for (k = 0; k < count; ++k) {
1728       aa[0] = 0.0;
1729       aa += bs;
1730     }
1731   }
1732   /* loop over all elements of off process part of matrix zeroing removed columns*/
1733   for (i = 0; i < l->B->rmap->N; ++i) {
1734     row = i / bs;
1735     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1736       for (k = 0; k < bs; ++k) {
1737         col = bs * baij->j[j] + k;
1738         if (PetscAbsScalar(mask[col])) {
1739           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1740           if (x) bb[i] -= aa[0] * xx[col];
1741           aa[0] = 0.0;
1742         }
1743       }
1744     }
1745   }
1746   if (x) {
1747     PetscCall(VecRestoreArray(b, &bb));
1748     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1749   }
1750   PetscCall(VecRestoreArray(lmask, &mask));
1751   PetscCall(VecDestroy(&lmask));
1752   PetscCall(PetscFree(lrows));
1753 
1754   /* only change matrix nonzero state if pattern was allowed to be changed */
1755   if (!((Mat_SeqBAIJ *)(l->A->data))->nonew) {
1756     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1757     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1758   }
1759   PetscFunctionReturn(PETSC_SUCCESS);
1760 }
1761 
1762 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1763 {
1764   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1765 
1766   PetscFunctionBegin;
1767   PetscCall(MatSetUnfactored(a->A));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1772 
1773 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1774 {
1775   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1776   Mat          a, b, c, d;
1777   PetscBool    flg;
1778 
1779   PetscFunctionBegin;
1780   a = matA->A;
1781   b = matA->B;
1782   c = matB->A;
1783   d = matB->B;
1784 
1785   PetscCall(MatEqual(a, c, &flg));
1786   if (flg) PetscCall(MatEqual(b, d, &flg));
1787   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1788   PetscFunctionReturn(PETSC_SUCCESS);
1789 }
1790 
1791 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1792 {
1793   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1794   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1795 
1796   PetscFunctionBegin;
1797   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1798   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1799     PetscCall(MatCopy_Basic(A, B, str));
1800   } else {
1801     PetscCall(MatCopy(a->A, b->A, str));
1802     PetscCall(MatCopy(a->B, b->B, str));
1803   }
1804   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1809 {
1810   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1811   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1812   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1813 
1814   PetscFunctionBegin;
1815   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1816   PetscFunctionReturn(PETSC_SUCCESS);
1817 }
1818 
1819 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1820 {
1821   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1822   PetscBLASInt bnz, one                         = 1;
1823   Mat_SeqBAIJ *x, *y;
1824   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1825 
1826   PetscFunctionBegin;
1827   if (str == SAME_NONZERO_PATTERN) {
1828     PetscScalar alpha = a;
1829     x                 = (Mat_SeqBAIJ *)xx->A->data;
1830     y                 = (Mat_SeqBAIJ *)yy->A->data;
1831     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1832     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1833     x = (Mat_SeqBAIJ *)xx->B->data;
1834     y = (Mat_SeqBAIJ *)yy->B->data;
1835     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1836     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1837     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1838   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1839     PetscCall(MatAXPY_Basic(Y, a, X, str));
1840   } else {
1841     Mat       B;
1842     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1843     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1844     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1845     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1846     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1847     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1848     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1849     PetscCall(MatSetType(B, MATMPIBAIJ));
1850     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1851     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1852     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1853     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1854     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1855     PetscCall(MatHeaderMerge(Y, &B));
1856     PetscCall(PetscFree(nnz_d));
1857     PetscCall(PetscFree(nnz_o));
1858   }
1859   PetscFunctionReturn(PETSC_SUCCESS);
1860 }
1861 
1862 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1863 {
1864   PetscFunctionBegin;
1865   if (PetscDefined(USE_COMPLEX)) {
1866     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1867 
1868     PetscCall(MatConjugate_SeqBAIJ(a->A));
1869     PetscCall(MatConjugate_SeqBAIJ(a->B));
1870   }
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1875 {
1876   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1877 
1878   PetscFunctionBegin;
1879   PetscCall(MatRealPart(a->A));
1880   PetscCall(MatRealPart(a->B));
1881   PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883 
1884 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1885 {
1886   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1887 
1888   PetscFunctionBegin;
1889   PetscCall(MatImaginaryPart(a->A));
1890   PetscCall(MatImaginaryPart(a->B));
1891   PetscFunctionReturn(PETSC_SUCCESS);
1892 }
1893 
1894 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1895 {
1896   IS       iscol_local;
1897   PetscInt csize;
1898 
1899   PetscFunctionBegin;
1900   PetscCall(ISGetLocalSize(iscol, &csize));
1901   if (call == MAT_REUSE_MATRIX) {
1902     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1903     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1904   } else {
1905     PetscCall(ISAllGather(iscol, &iscol_local));
1906   }
1907   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1908   if (call == MAT_INITIAL_MATRIX) {
1909     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1910     PetscCall(ISDestroy(&iscol_local));
1911   }
1912   PetscFunctionReturn(PETSC_SUCCESS);
1913 }
1914 
1915 /*
1916   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1917   in local and then by concatenating the local matrices the end result.
1918   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1919   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1920 */
1921 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1922 {
1923   PetscMPIInt  rank, size;
1924   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1925   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1926   Mat          M, Mreuse;
1927   MatScalar   *vwork, *aa;
1928   MPI_Comm     comm;
1929   IS           isrow_new, iscol_new;
1930   Mat_SeqBAIJ *aij;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1934   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1935   PetscCallMPI(MPI_Comm_size(comm, &size));
1936   /* The compression and expansion should be avoided. Doesn't point
1937      out errors, might change the indices, hence buggey */
1938   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1939   if (isrow == iscol) {
1940     iscol_new = isrow_new;
1941     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1942   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1943 
1944   if (call == MAT_REUSE_MATRIX) {
1945     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1946     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1947     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1948   } else {
1949     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1950   }
1951   PetscCall(ISDestroy(&isrow_new));
1952   PetscCall(ISDestroy(&iscol_new));
1953   /*
1954       m - number of local rows
1955       n - number of columns (same on all processors)
1956       rstart - first row in new global matrix generated
1957   */
1958   PetscCall(MatGetBlockSize(mat, &bs));
1959   PetscCall(MatGetSize(Mreuse, &m, &n));
1960   m = m / bs;
1961   n = n / bs;
1962 
1963   if (call == MAT_INITIAL_MATRIX) {
1964     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1965     ii  = aij->i;
1966     jj  = aij->j;
1967 
1968     /*
1969         Determine the number of non-zeros in the diagonal and off-diagonal
1970         portions of the matrix in order to do correct preallocation
1971     */
1972 
1973     /* first get start and end of "diagonal" columns */
1974     if (csize == PETSC_DECIDE) {
1975       PetscCall(ISGetSize(isrow, &mglobal));
1976       if (mglobal == n * bs) { /* square matrix */
1977         nlocal = m;
1978       } else {
1979         nlocal = n / size + ((n % size) > rank);
1980       }
1981     } else {
1982       nlocal = csize / bs;
1983     }
1984     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1985     rstart = rend - nlocal;
1986     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1987 
1988     /* next, compute all the lengths */
1989     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1990     for (i = 0; i < m; i++) {
1991       jend = ii[i + 1] - ii[i];
1992       olen = 0;
1993       dlen = 0;
1994       for (j = 0; j < jend; j++) {
1995         if (*jj < rstart || *jj >= rend) olen++;
1996         else dlen++;
1997         jj++;
1998       }
1999       olens[i] = olen;
2000       dlens[i] = dlen;
2001     }
2002     PetscCall(MatCreate(comm, &M));
2003     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2004     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2005     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2006     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2007     PetscCall(PetscFree2(dlens, olens));
2008   } else {
2009     PetscInt ml, nl;
2010 
2011     M = *newmat;
2012     PetscCall(MatGetLocalSize(M, &ml, &nl));
2013     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2014     PetscCall(MatZeroEntries(M));
2015     /*
2016          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2017        rather than the slower MatSetValues().
2018     */
2019     M->was_assembled = PETSC_TRUE;
2020     M->assembled     = PETSC_FALSE;
2021   }
2022   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2023   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2024   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2025   ii  = aij->i;
2026   jj  = aij->j;
2027   aa  = aij->a;
2028   for (i = 0; i < m; i++) {
2029     row   = rstart / bs + i;
2030     nz    = ii[i + 1] - ii[i];
2031     cwork = jj;
2032     jj    = PetscSafePointerPlusOffset(jj, nz);
2033     vwork = aa;
2034     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2035     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2036   }
2037 
2038   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2039   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2040   *newmat = M;
2041 
2042   /* save submatrix used in processor for next request */
2043   if (call == MAT_INITIAL_MATRIX) {
2044     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2045     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2046   }
2047   PetscFunctionReturn(PETSC_SUCCESS);
2048 }
2049 
2050 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2051 {
2052   MPI_Comm        comm, pcomm;
2053   PetscInt        clocal_size, nrows;
2054   const PetscInt *rows;
2055   PetscMPIInt     size;
2056   IS              crowp, lcolp;
2057 
2058   PetscFunctionBegin;
2059   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2060   /* make a collective version of 'rowp' */
2061   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2062   if (pcomm == comm) {
2063     crowp = rowp;
2064   } else {
2065     PetscCall(ISGetSize(rowp, &nrows));
2066     PetscCall(ISGetIndices(rowp, &rows));
2067     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2068     PetscCall(ISRestoreIndices(rowp, &rows));
2069   }
2070   PetscCall(ISSetPermutation(crowp));
2071   /* make a local version of 'colp' */
2072   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2073   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2074   if (size == 1) {
2075     lcolp = colp;
2076   } else {
2077     PetscCall(ISAllGather(colp, &lcolp));
2078   }
2079   PetscCall(ISSetPermutation(lcolp));
2080   /* now we just get the submatrix */
2081   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2082   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2083   /* clean up */
2084   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2085   if (size > 1) PetscCall(ISDestroy(&lcolp));
2086   PetscFunctionReturn(PETSC_SUCCESS);
2087 }
2088 
2089 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2090 {
2091   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2092   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2093 
2094   PetscFunctionBegin;
2095   if (nghosts) *nghosts = B->nbs;
2096   if (ghosts) *ghosts = baij->garray;
2097   PetscFunctionReturn(PETSC_SUCCESS);
2098 }
2099 
2100 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2101 {
2102   Mat          B;
2103   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2104   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2105   Mat_SeqAIJ  *b;
2106   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2107   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2108   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2109 
2110   PetscFunctionBegin;
2111   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2112   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2113 
2114   /*   Tell every processor the number of nonzeros per row  */
2115   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2116   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2117   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2118   displs = recvcounts + size;
2119   for (i = 0; i < size; i++) {
2120     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2121     displs[i]     = A->rmap->range[i] / bs;
2122   }
2123   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2124   /* Create the sequential matrix of the same type as the local block diagonal  */
2125   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2126   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2127   PetscCall(MatSetType(B, MATSEQAIJ));
2128   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2129   b = (Mat_SeqAIJ *)B->data;
2130 
2131   /*     Copy my part of matrix column indices over  */
2132   sendcount  = ad->nz + bd->nz;
2133   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2134   a_jsendbuf = ad->j;
2135   b_jsendbuf = bd->j;
2136   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2137   cnt        = 0;
2138   for (i = 0; i < n; i++) {
2139     /* put in lower diagonal portion */
2140     m = bd->i[i + 1] - bd->i[i];
2141     while (m > 0) {
2142       /* is it above diagonal (in bd (compressed) numbering) */
2143       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2144       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2145       m--;
2146     }
2147 
2148     /* put in diagonal portion */
2149     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2150 
2151     /* put in upper diagonal portion */
2152     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2153   }
2154   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2155 
2156   /*  Gather all column indices to all processors  */
2157   for (i = 0; i < size; i++) {
2158     recvcounts[i] = 0;
2159     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2160   }
2161   displs[0] = 0;
2162   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2163   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2164   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2165   /* set the b->ilen (length of each row) values */
2166   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2167   /* set the b->i indices */
2168   b->i[0] = 0;
2169   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2170   PetscCall(PetscFree(lens));
2171   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2172   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2173   PetscCall(PetscFree(recvcounts));
2174 
2175   PetscCall(MatPropagateSymmetryOptions(A, B));
2176   *newmat = B;
2177   PetscFunctionReturn(PETSC_SUCCESS);
2178 }
2179 
2180 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2181 {
2182   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2183   Vec          bb1 = NULL;
2184 
2185   PetscFunctionBegin;
2186   if (flag == SOR_APPLY_UPPER) {
2187     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2188     PetscFunctionReturn(PETSC_SUCCESS);
2189   }
2190 
2191   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2192 
2193   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2194     if (flag & SOR_ZERO_INITIAL_GUESS) {
2195       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2196       its--;
2197     }
2198 
2199     while (its--) {
2200       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2201       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2202 
2203       /* update rhs: bb1 = bb - B*x */
2204       PetscCall(VecScale(mat->lvec, -1.0));
2205       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2206 
2207       /* local sweep */
2208       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2209     }
2210   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2211     if (flag & SOR_ZERO_INITIAL_GUESS) {
2212       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2213       its--;
2214     }
2215     while (its--) {
2216       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2217       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2218 
2219       /* update rhs: bb1 = bb - B*x */
2220       PetscCall(VecScale(mat->lvec, -1.0));
2221       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2222 
2223       /* local sweep */
2224       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2225     }
2226   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2227     if (flag & SOR_ZERO_INITIAL_GUESS) {
2228       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2229       its--;
2230     }
2231     while (its--) {
2232       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2233       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2234 
2235       /* update rhs: bb1 = bb - B*x */
2236       PetscCall(VecScale(mat->lvec, -1.0));
2237       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2238 
2239       /* local sweep */
2240       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2241     }
2242   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2243 
2244   PetscCall(VecDestroy(&bb1));
2245   PetscFunctionReturn(PETSC_SUCCESS);
2246 }
2247 
2248 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2249 {
2250   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2251   PetscInt     m, N, i, *garray = aij->garray;
2252   PetscInt     ib, jb, bs = A->rmap->bs;
2253   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2254   MatScalar   *a_val = a_aij->a;
2255   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2256   MatScalar   *b_val = b_aij->a;
2257   PetscReal   *work;
2258 
2259   PetscFunctionBegin;
2260   PetscCall(MatGetSize(A, &m, &N));
2261   PetscCall(PetscCalloc1(N, &work));
2262   if (type == NORM_2) {
2263     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2264       for (jb = 0; jb < bs; jb++) {
2265         for (ib = 0; ib < bs; ib++) {
2266           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2267           a_val++;
2268         }
2269       }
2270     }
2271     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2272       for (jb = 0; jb < bs; jb++) {
2273         for (ib = 0; ib < bs; ib++) {
2274           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2275           b_val++;
2276         }
2277       }
2278     }
2279   } else if (type == NORM_1) {
2280     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2281       for (jb = 0; jb < bs; jb++) {
2282         for (ib = 0; ib < bs; ib++) {
2283           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2284           a_val++;
2285         }
2286       }
2287     }
2288     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2289       for (jb = 0; jb < bs; jb++) {
2290         for (ib = 0; ib < bs; ib++) {
2291           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2292           b_val++;
2293         }
2294       }
2295     }
2296   } else if (type == NORM_INFINITY) {
2297     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2298       for (jb = 0; jb < bs; jb++) {
2299         for (ib = 0; ib < bs; ib++) {
2300           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2301           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2302           a_val++;
2303         }
2304       }
2305     }
2306     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2307       for (jb = 0; jb < bs; jb++) {
2308         for (ib = 0; ib < bs; ib++) {
2309           int col   = garray[b_aij->j[i]] * bs + jb;
2310           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2311           b_val++;
2312         }
2313       }
2314     }
2315   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2316     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2317       for (jb = 0; jb < bs; jb++) {
2318         for (ib = 0; ib < bs; ib++) {
2319           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2320           a_val++;
2321         }
2322       }
2323     }
2324     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2325       for (jb = 0; jb < bs; jb++) {
2326         for (ib = 0; ib < bs; ib++) {
2327           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2328           b_val++;
2329         }
2330       }
2331     }
2332   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2333     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2334       for (jb = 0; jb < bs; jb++) {
2335         for (ib = 0; ib < bs; ib++) {
2336           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2337           a_val++;
2338         }
2339       }
2340     }
2341     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2342       for (jb = 0; jb < bs; jb++) {
2343         for (ib = 0; ib < bs; ib++) {
2344           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2345           b_val++;
2346         }
2347       }
2348     }
2349   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2350   if (type == NORM_INFINITY) {
2351     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2352   } else {
2353     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2354   }
2355   PetscCall(PetscFree(work));
2356   if (type == NORM_2) {
2357     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2358   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2359     for (i = 0; i < N; i++) reductions[i] /= m;
2360   }
2361   PetscFunctionReturn(PETSC_SUCCESS);
2362 }
2363 
2364 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2365 {
2366   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2367 
2368   PetscFunctionBegin;
2369   PetscCall(MatInvertBlockDiagonal(a->A, values));
2370   A->factorerrortype             = a->A->factorerrortype;
2371   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2372   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 
2376 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2377 {
2378   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2379   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2380 
2381   PetscFunctionBegin;
2382   if (!Y->preallocated) {
2383     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2384   } else if (!aij->nz) {
2385     PetscInt nonew = aij->nonew;
2386     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2387     aij->nonew = nonew;
2388   }
2389   PetscCall(MatShift_Basic(Y, a));
2390   PetscFunctionReturn(PETSC_SUCCESS);
2391 }
2392 
2393 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2394 {
2395   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2396 
2397   PetscFunctionBegin;
2398   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2399   PetscCall(MatMissingDiagonal(a->A, missing, d));
2400   if (d) {
2401     PetscInt rstart;
2402     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2403     *d += rstart / A->rmap->bs;
2404   }
2405   PetscFunctionReturn(PETSC_SUCCESS);
2406 }
2407 
2408 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2409 {
2410   PetscFunctionBegin;
2411   *a = ((Mat_MPIBAIJ *)A->data)->A;
2412   PetscFunctionReturn(PETSC_SUCCESS);
2413 }
2414 
2415 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2416 {
2417   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2418 
2419   PetscFunctionBegin;
2420   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2421   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2422   PetscFunctionReturn(PETSC_SUCCESS);
2423 }
2424 
2425 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2426                                        MatGetRow_MPIBAIJ,
2427                                        MatRestoreRow_MPIBAIJ,
2428                                        MatMult_MPIBAIJ,
2429                                        /* 4*/ MatMultAdd_MPIBAIJ,
2430                                        MatMultTranspose_MPIBAIJ,
2431                                        MatMultTransposeAdd_MPIBAIJ,
2432                                        NULL,
2433                                        NULL,
2434                                        NULL,
2435                                        /*10*/ NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        MatSOR_MPIBAIJ,
2439                                        MatTranspose_MPIBAIJ,
2440                                        /*15*/ MatGetInfo_MPIBAIJ,
2441                                        MatEqual_MPIBAIJ,
2442                                        MatGetDiagonal_MPIBAIJ,
2443                                        MatDiagonalScale_MPIBAIJ,
2444                                        MatNorm_MPIBAIJ,
2445                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2446                                        MatAssemblyEnd_MPIBAIJ,
2447                                        MatSetOption_MPIBAIJ,
2448                                        MatZeroEntries_MPIBAIJ,
2449                                        /*24*/ MatZeroRows_MPIBAIJ,
2450                                        NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        NULL,
2454                                        /*29*/ MatSetUp_MPI_Hash,
2455                                        NULL,
2456                                        NULL,
2457                                        MatGetDiagonalBlock_MPIBAIJ,
2458                                        NULL,
2459                                        /*34*/ MatDuplicate_MPIBAIJ,
2460                                        NULL,
2461                                        NULL,
2462                                        NULL,
2463                                        NULL,
2464                                        /*39*/ MatAXPY_MPIBAIJ,
2465                                        MatCreateSubMatrices_MPIBAIJ,
2466                                        MatIncreaseOverlap_MPIBAIJ,
2467                                        MatGetValues_MPIBAIJ,
2468                                        MatCopy_MPIBAIJ,
2469                                        /*44*/ NULL,
2470                                        MatScale_MPIBAIJ,
2471                                        MatShift_MPIBAIJ,
2472                                        NULL,
2473                                        MatZeroRowsColumns_MPIBAIJ,
2474                                        /*49*/ NULL,
2475                                        NULL,
2476                                        NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2480                                        NULL,
2481                                        MatSetUnfactored_MPIBAIJ,
2482                                        MatPermute_MPIBAIJ,
2483                                        MatSetValuesBlocked_MPIBAIJ,
2484                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2485                                        MatDestroy_MPIBAIJ,
2486                                        MatView_MPIBAIJ,
2487                                        NULL,
2488                                        NULL,
2489                                        /*64*/ NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        /*74*/ NULL,
2500                                        MatFDColoringApply_BAIJ,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        /*79*/ NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        MatLoad_MPIBAIJ,
2509                                        /*84*/ NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        /*89*/ NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        /*94*/ NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        /*99*/ NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        MatConjugate_MPIBAIJ,
2528                                        NULL,
2529                                        /*104*/ NULL,
2530                                        MatRealPart_MPIBAIJ,
2531                                        MatImaginaryPart_MPIBAIJ,
2532                                        NULL,
2533                                        NULL,
2534                                        /*109*/ NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        MatMissingDiagonal_MPIBAIJ,
2539                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2540                                        NULL,
2541                                        MatGetGhosts_MPIBAIJ,
2542                                        NULL,
2543                                        NULL,
2544                                        /*119*/ NULL,
2545                                        NULL,
2546                                        NULL,
2547                                        NULL,
2548                                        MatGetMultiProcBlock_MPIBAIJ,
2549                                        /*124*/ NULL,
2550                                        MatGetColumnReductions_MPIBAIJ,
2551                                        MatInvertBlockDiagonal_MPIBAIJ,
2552                                        NULL,
2553                                        NULL,
2554                                        /*129*/ NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        /*134*/ NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        /*139*/ MatSetBlockSizes_Default,
2565                                        NULL,
2566                                        NULL,
2567                                        MatFDColoringSetUp_MPIXAIJ,
2568                                        NULL,
2569                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2570                                        NULL,
2571                                        NULL,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        /*150*/ NULL,
2576                                        MatEliminateZeros_MPIBAIJ,
2577                                        NULL};
2578 
2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2581 
2582 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2583 {
2584   PetscInt        m, rstart, cstart, cend;
2585   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2586   const PetscInt *JJ          = NULL;
2587   PetscScalar    *values      = NULL;
2588   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2589   PetscBool       nooffprocentries;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2593   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2594   PetscCall(PetscLayoutSetUp(B->rmap));
2595   PetscCall(PetscLayoutSetUp(B->cmap));
2596   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2597   m      = B->rmap->n / bs;
2598   rstart = B->rmap->rstart / bs;
2599   cstart = B->cmap->rstart / bs;
2600   cend   = B->cmap->rend / bs;
2601 
2602   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2603   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2604   for (i = 0; i < m; i++) {
2605     nz = ii[i + 1] - ii[i];
2606     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2607     nz_max = PetscMax(nz_max, nz);
2608     dlen   = 0;
2609     olen   = 0;
2610     JJ     = jj + ii[i];
2611     for (j = 0; j < nz; j++) {
2612       if (*JJ < cstart || *JJ >= cend) olen++;
2613       else dlen++;
2614       JJ++;
2615     }
2616     d_nnz[i] = dlen;
2617     o_nnz[i] = olen;
2618   }
2619   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2620   PetscCall(PetscFree2(d_nnz, o_nnz));
2621 
2622   values = (PetscScalar *)V;
2623   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2624   for (i = 0; i < m; i++) {
2625     PetscInt        row   = i + rstart;
2626     PetscInt        ncols = ii[i + 1] - ii[i];
2627     const PetscInt *icols = jj + ii[i];
2628     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2629       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2630       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2631     } else { /* block ordering does not match so we can only insert one block at a time. */
2632       PetscInt j;
2633       for (j = 0; j < ncols; j++) {
2634         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2635         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2636       }
2637     }
2638   }
2639 
2640   if (!V) PetscCall(PetscFree(values));
2641   nooffprocentries    = B->nooffprocentries;
2642   B->nooffprocentries = PETSC_TRUE;
2643   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2644   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2645   B->nooffprocentries = nooffprocentries;
2646 
2647   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2648   PetscFunctionReturn(PETSC_SUCCESS);
2649 }
2650 
2651 /*@C
2652   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2653 
2654   Collective
2655 
2656   Input Parameters:
2657 + B  - the matrix
2658 . bs - the block size
2659 . i  - the indices into `j` for the start of each local row (starts with zero)
2660 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2661 - v  - optional values in the matrix
2662 
2663   Level: advanced
2664 
2665   Notes:
2666   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2667   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2668   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2669   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2670   block column and the second index is over columns within a block.
2671 
2672   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2673 
2674 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2675 @*/
2676 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2677 {
2678   PetscFunctionBegin;
2679   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2680   PetscValidType(B, 1);
2681   PetscValidLogicalCollectiveInt(B, bs, 2);
2682   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2683   PetscFunctionReturn(PETSC_SUCCESS);
2684 }
2685 
2686 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2687 {
2688   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2689   PetscInt     i;
2690   PetscMPIInt  size;
2691 
2692   PetscFunctionBegin;
2693   if (B->hash_active) {
2694     B->ops[0]      = b->cops;
2695     B->hash_active = PETSC_FALSE;
2696   }
2697   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2698   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2699   PetscCall(PetscLayoutSetUp(B->rmap));
2700   PetscCall(PetscLayoutSetUp(B->cmap));
2701   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2702 
2703   if (d_nnz) {
2704     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2705   }
2706   if (o_nnz) {
2707     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2708   }
2709 
2710   b->bs2 = bs * bs;
2711   b->mbs = B->rmap->n / bs;
2712   b->nbs = B->cmap->n / bs;
2713   b->Mbs = B->rmap->N / bs;
2714   b->Nbs = B->cmap->N / bs;
2715 
2716   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2717   b->rstartbs = B->rmap->rstart / bs;
2718   b->rendbs   = B->rmap->rend / bs;
2719   b->cstartbs = B->cmap->rstart / bs;
2720   b->cendbs   = B->cmap->rend / bs;
2721 
2722 #if defined(PETSC_USE_CTABLE)
2723   PetscCall(PetscHMapIDestroy(&b->colmap));
2724 #else
2725   PetscCall(PetscFree(b->colmap));
2726 #endif
2727   PetscCall(PetscFree(b->garray));
2728   PetscCall(VecDestroy(&b->lvec));
2729   PetscCall(VecScatterDestroy(&b->Mvctx));
2730 
2731   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2732 
2733   MatSeqXAIJGetOptions_Private(b->B);
2734   PetscCall(MatDestroy(&b->B));
2735   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2736   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2737   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2738   MatSeqXAIJRestoreOptions_Private(b->B);
2739 
2740   MatSeqXAIJGetOptions_Private(b->A);
2741   PetscCall(MatDestroy(&b->A));
2742   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2743   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2744   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2745   MatSeqXAIJRestoreOptions_Private(b->A);
2746 
2747   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2748   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2749   B->preallocated  = PETSC_TRUE;
2750   B->was_assembled = PETSC_FALSE;
2751   B->assembled     = PETSC_FALSE;
2752   PetscFunctionReturn(PETSC_SUCCESS);
2753 }
2754 
2755 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2756 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2757 
2758 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2759 {
2760   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2761   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2762   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2763   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2764 
2765   PetscFunctionBegin;
2766   PetscCall(PetscMalloc1(M + 1, &ii));
2767   ii[0] = 0;
2768   for (i = 0; i < M; i++) {
2769     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2770     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2771     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2772     /* remove one from count of matrix has diagonal */
2773     for (j = id[i]; j < id[i + 1]; j++) {
2774       if (jd[j] == i) {
2775         ii[i + 1]--;
2776         break;
2777       }
2778     }
2779   }
2780   PetscCall(PetscMalloc1(ii[M], &jj));
2781   cnt = 0;
2782   for (i = 0; i < M; i++) {
2783     for (j = io[i]; j < io[i + 1]; j++) {
2784       if (garray[jo[j]] > rstart) break;
2785       jj[cnt++] = garray[jo[j]];
2786     }
2787     for (k = id[i]; k < id[i + 1]; k++) {
2788       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2789     }
2790     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2791   }
2792   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2793   PetscFunctionReturn(PETSC_SUCCESS);
2794 }
2795 
2796 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2797 
2798 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2799 
2800 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2801 {
2802   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2803   Mat_MPIAIJ  *b;
2804   Mat          B;
2805 
2806   PetscFunctionBegin;
2807   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2808 
2809   if (reuse == MAT_REUSE_MATRIX) {
2810     B = *newmat;
2811   } else {
2812     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2813     PetscCall(MatSetType(B, MATMPIAIJ));
2814     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2815     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2816     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2817     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2818   }
2819   b = (Mat_MPIAIJ *)B->data;
2820 
2821   if (reuse == MAT_REUSE_MATRIX) {
2822     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2823     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2824   } else {
2825     PetscInt   *garray = a->garray;
2826     Mat_SeqAIJ *bB;
2827     PetscInt    bs, nnz;
2828     PetscCall(MatDestroy(&b->A));
2829     PetscCall(MatDestroy(&b->B));
2830     /* just clear out the data structure */
2831     PetscCall(MatDisAssemble_MPIAIJ(B));
2832     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2833     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2834 
2835     /* Global numbering for b->B columns */
2836     bB  = (Mat_SeqAIJ *)b->B->data;
2837     bs  = A->rmap->bs;
2838     nnz = bB->i[A->rmap->n];
2839     for (PetscInt k = 0; k < nnz; k++) {
2840       PetscInt bj = bB->j[k] / bs;
2841       PetscInt br = bB->j[k] % bs;
2842       bB->j[k]    = garray[bj] * bs + br;
2843     }
2844   }
2845   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2846   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2847 
2848   if (reuse == MAT_INPLACE_MATRIX) {
2849     PetscCall(MatHeaderReplace(A, &B));
2850   } else {
2851     *newmat = B;
2852   }
2853   PetscFunctionReturn(PETSC_SUCCESS);
2854 }
2855 
2856 /*MC
2857    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2858 
2859    Options Database Keys:
2860 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2861 . -mat_block_size <bs> - set the blocksize used to store the matrix
2862 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2863 - -mat_use_hash_table <fact> - set hash table factor
2864 
2865    Level: beginner
2866 
2867    Note:
2868     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2869     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2870 
2871 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2872 M*/
2873 
2874 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2875 
2876 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2877 {
2878   Mat_MPIBAIJ *b;
2879   PetscBool    flg = PETSC_FALSE;
2880 
2881   PetscFunctionBegin;
2882   PetscCall(PetscNew(&b));
2883   B->data      = (void *)b;
2884   B->ops[0]    = MatOps_Values;
2885   B->assembled = PETSC_FALSE;
2886 
2887   B->insertmode = NOT_SET_VALUES;
2888   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2889   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2890 
2891   /* build local table of row and column ownerships */
2892   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2893 
2894   /* build cache for off array entries formed */
2895   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2896 
2897   b->donotstash  = PETSC_FALSE;
2898   b->colmap      = NULL;
2899   b->garray      = NULL;
2900   b->roworiented = PETSC_TRUE;
2901 
2902   /* stuff used in block assembly */
2903   b->barray = NULL;
2904 
2905   /* stuff used for matrix vector multiply */
2906   b->lvec  = NULL;
2907   b->Mvctx = NULL;
2908 
2909   /* stuff for MatGetRow() */
2910   b->rowindices   = NULL;
2911   b->rowvalues    = NULL;
2912   b->getrowactive = PETSC_FALSE;
2913 
2914   /* hash table stuff */
2915   b->ht           = NULL;
2916   b->hd           = NULL;
2917   b->ht_size      = 0;
2918   b->ht_flag      = PETSC_FALSE;
2919   b->ht_fact      = 0;
2920   b->ht_total_ct  = 0;
2921   b->ht_insert_ct = 0;
2922 
2923   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2924   b->ijonly = PETSC_FALSE;
2925 
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2929 #if defined(PETSC_HAVE_HYPRE)
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2931 #endif
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2939   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2940 
2941   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2942   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2943   if (flg) {
2944     PetscReal fact = 1.39;
2945     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2946     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2947     if (fact <= 1.0) fact = 1.39;
2948     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2949     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2950   }
2951   PetscOptionsEnd();
2952   PetscFunctionReturn(PETSC_SUCCESS);
2953 }
2954 
2955 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2956 /*MC
2957    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2958 
2959    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2960    and `MATMPIBAIJ` otherwise.
2961 
2962    Options Database Keys:
2963 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2964 
2965   Level: beginner
2966 
2967 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2968 M*/
2969 
2970 /*@C
2971   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2972   (block compressed row).
2973 
2974   Collective
2975 
2976   Input Parameters:
2977 + B     - the matrix
2978 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2979           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2980 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2981            submatrix  (same for all local rows)
2982 . d_nnz - array containing the number of block nonzeros in the various block rows
2983            of the in diagonal portion of the local (possibly different for each block
2984            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2985            set it even if it is zero.
2986 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2987            submatrix (same for all local rows).
2988 - o_nnz - array containing the number of nonzeros in the various block rows of the
2989            off-diagonal portion of the local submatrix (possibly different for
2990            each block row) or `NULL`.
2991 
2992    If the *_nnz parameter is given then the *_nz parameter is ignored
2993 
2994   Options Database Keys:
2995 + -mat_block_size            - size of the blocks to use
2996 - -mat_use_hash_table <fact> - set hash table factor
2997 
2998   Level: intermediate
2999 
3000   Notes:
3001   For good matrix assembly performance
3002   the user should preallocate the matrix storage by setting the parameters
3003   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3004   performance can be increased by more than a factor of 50.
3005 
3006   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3007   than it must be used on all processors that share the object for that argument.
3008 
3009   Storage Information:
3010   For a square global matrix we define each processor's diagonal portion
3011   to be its local rows and the corresponding columns (a square submatrix);
3012   each processor's off-diagonal portion encompasses the remainder of the
3013   local matrix (a rectangular submatrix).
3014 
3015   The user can specify preallocated storage for the diagonal part of
3016   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3017   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3018   memory allocation.  Likewise, specify preallocated storage for the
3019   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3020 
3021   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3022   the figure below we depict these three local rows and all columns (0-11).
3023 
3024 .vb
3025            0 1 2 3 4 5 6 7 8 9 10 11
3026           --------------------------
3027    row 3  |o o o d d d o o o o  o  o
3028    row 4  |o o o d d d o o o o  o  o
3029    row 5  |o o o d d d o o o o  o  o
3030           --------------------------
3031 .ve
3032 
3033   Thus, any entries in the d locations are stored in the d (diagonal)
3034   submatrix, and any entries in the o locations are stored in the
3035   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3036   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3037 
3038   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3039   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3040   In general, for PDE problems in which most nonzeros are near the diagonal,
3041   one expects `d_nz` >> `o_nz`.
3042 
3043   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3044   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3045   You can also run with the option `-info` and look for messages with the string
3046   malloc in them to see if additional memory allocation was needed.
3047 
3048 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3049 @*/
3050 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3051 {
3052   PetscFunctionBegin;
3053   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3054   PetscValidType(B, 1);
3055   PetscValidLogicalCollectiveInt(B, bs, 2);
3056   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3057   PetscFunctionReturn(PETSC_SUCCESS);
3058 }
3059 
3060 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3061 /*@C
3062   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3063   (block compressed row).
3064 
3065   Collective
3066 
3067   Input Parameters:
3068 + comm  - MPI communicator
3069 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3070           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3071 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3072            This value should be the same as the local size used in creating the
3073            y vector for the matrix-vector product y = Ax.
3074 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3075            This value should be the same as the local size used in creating the
3076            x vector for the matrix-vector product y = Ax.
3077 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3078 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3079 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3080            submatrix  (same for all local rows)
3081 . d_nnz - array containing the number of nonzero blocks in the various block rows
3082            of the in diagonal portion of the local (possibly different for each block
3083            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3084            and set it even if it is zero.
3085 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3086            submatrix (same for all local rows).
3087 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3088            off-diagonal portion of the local submatrix (possibly different for
3089            each block row) or NULL.
3090 
3091   Output Parameter:
3092 . A - the matrix
3093 
3094   Options Database Keys:
3095 + -mat_block_size            - size of the blocks to use
3096 - -mat_use_hash_table <fact> - set hash table factor
3097 
3098   Level: intermediate
3099 
3100   Notes:
3101   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3102   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3103   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3104 
3105   For good matrix assembly performance
3106   the user should preallocate the matrix storage by setting the parameters
3107   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3108   performance can be increased by more than a factor of 50.
3109 
3110   If the *_nnz parameter is given then the *_nz parameter is ignored
3111 
3112   A nonzero block is any block that as 1 or more nonzeros in it
3113 
3114   The user MUST specify either the local or global matrix dimensions
3115   (possibly both).
3116 
3117   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3118   than it must be used on all processors that share the object for that argument.
3119 
3120   Storage Information:
3121   For a square global matrix we define each processor's diagonal portion
3122   to be its local rows and the corresponding columns (a square submatrix);
3123   each processor's off-diagonal portion encompasses the remainder of the
3124   local matrix (a rectangular submatrix).
3125 
3126   The user can specify preallocated storage for the diagonal part of
3127   the local submatrix with either d_nz or d_nnz (not both).  Set
3128   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3129   memory allocation.  Likewise, specify preallocated storage for the
3130   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3131 
3132   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3133   the figure below we depict these three local rows and all columns (0-11).
3134 
3135 .vb
3136            0 1 2 3 4 5 6 7 8 9 10 11
3137           --------------------------
3138    row 3  |o o o d d d o o o o  o  o
3139    row 4  |o o o d d d o o o o  o  o
3140    row 5  |o o o d d d o o o o  o  o
3141           --------------------------
3142 .ve
3143 
3144   Thus, any entries in the d locations are stored in the d (diagonal)
3145   submatrix, and any entries in the o locations are stored in the
3146   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3147   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3148 
3149   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3150   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3151   In general, for PDE problems in which most nonzeros are near the diagonal,
3152   one expects `d_nz` >> `o_nz`.
3153 
3154 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3155 @*/
3156 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3157 {
3158   PetscMPIInt size;
3159 
3160   PetscFunctionBegin;
3161   PetscCall(MatCreate(comm, A));
3162   PetscCall(MatSetSizes(*A, m, n, M, N));
3163   PetscCallMPI(MPI_Comm_size(comm, &size));
3164   if (size > 1) {
3165     PetscCall(MatSetType(*A, MATMPIBAIJ));
3166     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3167   } else {
3168     PetscCall(MatSetType(*A, MATSEQBAIJ));
3169     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3170   }
3171   PetscFunctionReturn(PETSC_SUCCESS);
3172 }
3173 
3174 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3175 {
3176   Mat          mat;
3177   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3178   PetscInt     len = 0;
3179 
3180   PetscFunctionBegin;
3181   *newmat = NULL;
3182   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3183   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3184   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3185 
3186   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3187   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3188   if (matin->hash_active) {
3189     PetscCall(MatSetUp(mat));
3190   } else {
3191     mat->factortype   = matin->factortype;
3192     mat->preallocated = PETSC_TRUE;
3193     mat->assembled    = PETSC_TRUE;
3194     mat->insertmode   = NOT_SET_VALUES;
3195 
3196     a             = (Mat_MPIBAIJ *)mat->data;
3197     mat->rmap->bs = matin->rmap->bs;
3198     a->bs2        = oldmat->bs2;
3199     a->mbs        = oldmat->mbs;
3200     a->nbs        = oldmat->nbs;
3201     a->Mbs        = oldmat->Mbs;
3202     a->Nbs        = oldmat->Nbs;
3203 
3204     a->size         = oldmat->size;
3205     a->rank         = oldmat->rank;
3206     a->donotstash   = oldmat->donotstash;
3207     a->roworiented  = oldmat->roworiented;
3208     a->rowindices   = NULL;
3209     a->rowvalues    = NULL;
3210     a->getrowactive = PETSC_FALSE;
3211     a->barray       = NULL;
3212     a->rstartbs     = oldmat->rstartbs;
3213     a->rendbs       = oldmat->rendbs;
3214     a->cstartbs     = oldmat->cstartbs;
3215     a->cendbs       = oldmat->cendbs;
3216 
3217     /* hash table stuff */
3218     a->ht           = NULL;
3219     a->hd           = NULL;
3220     a->ht_size      = 0;
3221     a->ht_flag      = oldmat->ht_flag;
3222     a->ht_fact      = oldmat->ht_fact;
3223     a->ht_total_ct  = 0;
3224     a->ht_insert_ct = 0;
3225 
3226     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3227     if (oldmat->colmap) {
3228 #if defined(PETSC_USE_CTABLE)
3229       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3230 #else
3231       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3232       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3233 #endif
3234     } else a->colmap = NULL;
3235 
3236     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3237       PetscCall(PetscMalloc1(len, &a->garray));
3238       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3239     } else a->garray = NULL;
3240 
3241     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3242     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3243     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3244 
3245     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3246     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3247   }
3248   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3249   *newmat = mat;
3250   PetscFunctionReturn(PETSC_SUCCESS);
3251 }
3252 
3253 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3254 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3255 {
3256   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3257   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3258   PetscScalar *matvals;
3259 
3260   PetscFunctionBegin;
3261   PetscCall(PetscViewerSetUp(viewer));
3262 
3263   /* read in matrix header */
3264   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3265   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3266   M  = header[1];
3267   N  = header[2];
3268   nz = header[3];
3269   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3270   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3271   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3272 
3273   /* set block sizes from the viewer's .info file */
3274   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3275   /* set local sizes if not set already */
3276   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3277   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3278   /* set global sizes if not set already */
3279   if (mat->rmap->N < 0) mat->rmap->N = M;
3280   if (mat->cmap->N < 0) mat->cmap->N = N;
3281   PetscCall(PetscLayoutSetUp(mat->rmap));
3282   PetscCall(PetscLayoutSetUp(mat->cmap));
3283 
3284   /* check if the matrix sizes are correct */
3285   PetscCall(MatGetSize(mat, &rows, &cols));
3286   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3287   PetscCall(MatGetBlockSize(mat, &bs));
3288   PetscCall(MatGetLocalSize(mat, &m, &n));
3289   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3290   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3291   mbs = m / bs;
3292   nbs = n / bs;
3293 
3294   /* read in row lengths and build row indices */
3295   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3296   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3297   rowidxs[0] = 0;
3298   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3299   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3300   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3301 
3302   /* read in column indices and matrix values */
3303   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3304   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3305   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3306 
3307   {                /* preallocate matrix storage */
3308     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3309     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3310     PetscBool  sbaij, done;
3311     PetscInt  *d_nnz, *o_nnz;
3312 
3313     PetscCall(PetscBTCreate(nbs, &bt));
3314     PetscCall(PetscHSetICreate(&ht));
3315     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3316     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3317     for (i = 0; i < mbs; i++) {
3318       PetscCall(PetscBTMemzero(nbs, bt));
3319       PetscCall(PetscHSetIClear(ht));
3320       for (k = 0; k < bs; k++) {
3321         PetscInt row = bs * i + k;
3322         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3323           PetscInt col = colidxs[j];
3324           if (!sbaij || col >= row) {
3325             if (col >= cs && col < ce) {
3326               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3327             } else {
3328               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3329               if (done) o_nnz[i]++;
3330             }
3331           }
3332         }
3333       }
3334     }
3335     PetscCall(PetscBTDestroy(&bt));
3336     PetscCall(PetscHSetIDestroy(&ht));
3337     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3338     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3339     PetscCall(PetscFree2(d_nnz, o_nnz));
3340   }
3341 
3342   /* store matrix values */
3343   for (i = 0; i < m; i++) {
3344     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3345     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3346   }
3347 
3348   PetscCall(PetscFree(rowidxs));
3349   PetscCall(PetscFree2(colidxs, matvals));
3350   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3351   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3352   PetscFunctionReturn(PETSC_SUCCESS);
3353 }
3354 
3355 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3356 {
3357   PetscBool isbinary;
3358 
3359   PetscFunctionBegin;
3360   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3361   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3362   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3363   PetscFunctionReturn(PETSC_SUCCESS);
3364 }
3365 
3366 /*@
3367   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3368 
3369   Input Parameters:
3370 + mat  - the matrix
3371 - fact - factor
3372 
3373   Options Database Key:
3374 . -mat_use_hash_table <fact> - provide the factor
3375 
3376   Level: advanced
3377 
3378 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3379 @*/
3380 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3381 {
3382   PetscFunctionBegin;
3383   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3384   PetscFunctionReturn(PETSC_SUCCESS);
3385 }
3386 
3387 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3388 {
3389   Mat_MPIBAIJ *baij;
3390 
3391   PetscFunctionBegin;
3392   baij          = (Mat_MPIBAIJ *)mat->data;
3393   baij->ht_fact = fact;
3394   PetscFunctionReturn(PETSC_SUCCESS);
3395 }
3396 
3397 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3398 {
3399   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3400   PetscBool    flg;
3401 
3402   PetscFunctionBegin;
3403   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3404   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3405   if (Ad) *Ad = a->A;
3406   if (Ao) *Ao = a->B;
3407   if (colmap) *colmap = a->garray;
3408   PetscFunctionReturn(PETSC_SUCCESS);
3409 }
3410 
3411 /*
3412     Special version for direct calls from Fortran (to eliminate two function call overheads
3413 */
3414 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3415   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3416 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3417   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3418 #endif
3419 
3420 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3421 /*@C
3422   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3423 
3424   Collective
3425 
3426   Input Parameters:
3427 + matin  - the matrix
3428 . min    - number of input rows
3429 . im     - input rows
3430 . nin    - number of input columns
3431 . in     - input columns
3432 . v      - numerical values input
3433 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3434 
3435   Level: advanced
3436 
3437   Developer Notes:
3438   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3439 
3440 .seealso: `Mat`, `MatSetValuesBlocked()`
3441 @*/
3442 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3443 {
3444   /* convert input arguments to C version */
3445   Mat        mat = *matin;
3446   PetscInt   m = *min, n = *nin;
3447   InsertMode addv = *addvin;
3448 
3449   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3450   const MatScalar *value;
3451   MatScalar       *barray      = baij->barray;
3452   PetscBool        roworiented = baij->roworiented;
3453   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3454   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3455   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3456 
3457   PetscFunctionBegin;
3458   /* tasks normally handled by MatSetValuesBlocked() */
3459   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3460   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3461   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3462   if (mat->assembled) {
3463     mat->was_assembled = PETSC_TRUE;
3464     mat->assembled     = PETSC_FALSE;
3465   }
3466   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3467 
3468   if (!barray) {
3469     PetscCall(PetscMalloc1(bs2, &barray));
3470     baij->barray = barray;
3471   }
3472 
3473   if (roworiented) stepval = (n - 1) * bs;
3474   else stepval = (m - 1) * bs;
3475 
3476   for (i = 0; i < m; i++) {
3477     if (im[i] < 0) continue;
3478     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3479     if (im[i] >= rstart && im[i] < rend) {
3480       row = im[i] - rstart;
3481       for (j = 0; j < n; j++) {
3482         /* If NumCol = 1 then a copy is not required */
3483         if ((roworiented) && (n == 1)) {
3484           barray = (MatScalar *)v + i * bs2;
3485         } else if ((!roworiented) && (m == 1)) {
3486           barray = (MatScalar *)v + j * bs2;
3487         } else { /* Here a copy is required */
3488           if (roworiented) {
3489             value = v + i * (stepval + bs) * bs + j * bs;
3490           } else {
3491             value = v + j * (stepval + bs) * bs + i * bs;
3492           }
3493           for (ii = 0; ii < bs; ii++, value += stepval) {
3494             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3495           }
3496           barray -= bs2;
3497         }
3498 
3499         if (in[j] >= cstart && in[j] < cend) {
3500           col = in[j] - cstart;
3501           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3502         } else if (in[j] < 0) {
3503           continue;
3504         } else {
3505           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3506           if (mat->was_assembled) {
3507             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3508 
3509 #if defined(PETSC_USE_DEBUG)
3510   #if defined(PETSC_USE_CTABLE)
3511             {
3512               PetscInt data;
3513               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3514               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3515             }
3516   #else
3517             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3518   #endif
3519 #endif
3520 #if defined(PETSC_USE_CTABLE)
3521             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3522             col = (col - 1) / bs;
3523 #else
3524             col = (baij->colmap[in[j]] - 1) / bs;
3525 #endif
3526             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3527               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3528               col = in[j];
3529             }
3530           } else col = in[j];
3531           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3532         }
3533       }
3534     } else {
3535       if (!baij->donotstash) {
3536         if (roworiented) {
3537           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3538         } else {
3539           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3540         }
3541       }
3542     }
3543   }
3544 
3545   /* task normally handled by MatSetValuesBlocked() */
3546   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3547   PetscFunctionReturn(PETSC_SUCCESS);
3548 }
3549 
3550 /*@
3551   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3552   CSR format the local rows.
3553 
3554   Collective
3555 
3556   Input Parameters:
3557 + comm - MPI communicator
3558 . bs   - the block size, only a block size of 1 is supported
3559 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3560 . n    - This value should be the same as the local size used in creating the
3561        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3562        calculated if N is given) For square matrices n is almost always m.
3563 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3564 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3565 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3566 . j    - column indices
3567 - a    - matrix values
3568 
3569   Output Parameter:
3570 . mat - the matrix
3571 
3572   Level: intermediate
3573 
3574   Notes:
3575   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3576   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3577   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3578 
3579   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3580   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3581   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3582   with column-major ordering within blocks.
3583 
3584   The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3585 
3586 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3587           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3588 @*/
3589 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3590 {
3591   PetscFunctionBegin;
3592   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3593   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3594   PetscCall(MatCreate(comm, mat));
3595   PetscCall(MatSetSizes(*mat, m, n, M, N));
3596   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3597   PetscCall(MatSetBlockSize(*mat, bs));
3598   PetscCall(MatSetUp(*mat));
3599   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3600   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3601   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3602   PetscFunctionReturn(PETSC_SUCCESS);
3603 }
3604 
3605 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3606 {
3607   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3608   PetscInt    *indx;
3609   PetscScalar *values;
3610 
3611   PetscFunctionBegin;
3612   PetscCall(MatGetSize(inmat, &m, &N));
3613   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3614     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3615     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3616     PetscInt    *bindx, rmax = a->rmax, j;
3617     PetscMPIInt  rank, size;
3618 
3619     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3620     mbs = m / bs;
3621     Nbs = N / cbs;
3622     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3623     nbs = n / cbs;
3624 
3625     PetscCall(PetscMalloc1(rmax, &bindx));
3626     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3627 
3628     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3629     PetscCallMPI(MPI_Comm_rank(comm, &size));
3630     if (rank == size - 1) {
3631       /* Check sum(nbs) = Nbs */
3632       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3633     }
3634 
3635     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3636     for (i = 0; i < mbs; i++) {
3637       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3638       nnz = nnz / bs;
3639       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3640       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3641       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3642     }
3643     PetscCall(PetscFree(bindx));
3644 
3645     PetscCall(MatCreate(comm, outmat));
3646     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3647     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3648     PetscCall(MatSetType(*outmat, MATBAIJ));
3649     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3650     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3651     MatPreallocateEnd(dnz, onz);
3652     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3653   }
3654 
3655   /* numeric phase */
3656   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3657   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3658 
3659   for (i = 0; i < m; i++) {
3660     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3661     Ii = i + rstart;
3662     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3663     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3664   }
3665   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3666   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3667   PetscFunctionReturn(PETSC_SUCCESS);
3668 }
3669