xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f409f0aa4ada26a4a5b8f25b91c71f4af3eee8e5)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
663       row = idxm[i] - bsrstart;
664       for (j = 0; j < n; j++) {
665         if (idxn[j] < 0) continue; /* negative column */
666         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667         if (idxn[j] >= bscstart && idxn[j] < bscend) {
668           col = idxn[j] - bscstart;
669           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670         } else {
671           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674           data--;
675 #else
676           data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
679           else {
680             col = data + idxn[j] % bs;
681             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682           }
683         }
684       }
685     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
691 {
692   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
693   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
694   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
695   PetscReal    sum = 0.0;
696   MatScalar   *v;
697 
698   PetscFunctionBegin;
699   if (baij->size == 1) {
700     PetscCall(MatNorm(baij->A, type, nrm));
701   } else {
702     if (type == NORM_FROBENIUS) {
703       v  = amat->a;
704       nz = amat->nz * bs2;
705       for (i = 0; i < nz; i++) {
706         sum += PetscRealPart(PetscConj(*v) * (*v));
707         v++;
708       }
709       v  = bmat->a;
710       nz = bmat->nz * bs2;
711       for (i = 0; i < nz; i++) {
712         sum += PetscRealPart(PetscConj(*v) * (*v));
713         v++;
714       }
715       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
716       *nrm = PetscSqrtReal(*nrm);
717     } else if (type == NORM_1) { /* max column sum */
718       PetscReal *tmp;
719       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
720 
721       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
722       v  = amat->a;
723       jj = amat->j;
724       for (i = 0; i < amat->nz; i++) {
725         for (j = 0; j < bs; j++) {
726           col = bs * (cstart + *jj) + j; /* column index */
727           for (row = 0; row < bs; row++) {
728             tmp[col] += PetscAbsScalar(*v);
729             v++;
730           }
731         }
732         jj++;
733       }
734       v  = bmat->a;
735       jj = bmat->j;
736       for (i = 0; i < bmat->nz; i++) {
737         for (j = 0; j < bs; j++) {
738           col = bs * garray[*jj] + j;
739           for (row = 0; row < bs; row++) {
740             tmp[col] += PetscAbsScalar(*v);
741             v++;
742           }
743         }
744         jj++;
745       }
746       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
747       *nrm = 0.0;
748       for (j = 0; j < mat->cmap->N; j++) {
749         if (tmp[j] > *nrm) *nrm = tmp[j];
750       }
751       PetscCall(PetscFree(tmp));
752     } else if (type == NORM_INFINITY) { /* max row sum */
753       PetscReal *sums;
754       PetscCall(PetscMalloc1(bs, &sums));
755       sum = 0.0;
756       for (j = 0; j < amat->mbs; j++) {
757         for (row = 0; row < bs; row++) sums[row] = 0.0;
758         v  = amat->a + bs2 * amat->i[j];
759         nz = amat->i[j + 1] - amat->i[j];
760         for (i = 0; i < nz; i++) {
761           for (col = 0; col < bs; col++) {
762             for (row = 0; row < bs; row++) {
763               sums[row] += PetscAbsScalar(*v);
764               v++;
765             }
766           }
767         }
768         v  = bmat->a + bs2 * bmat->i[j];
769         nz = bmat->i[j + 1] - bmat->i[j];
770         for (i = 0; i < nz; i++) {
771           for (col = 0; col < bs; col++) {
772             for (row = 0; row < bs; row++) {
773               sums[row] += PetscAbsScalar(*v);
774               v++;
775             }
776           }
777         }
778         for (row = 0; row < bs; row++) {
779           if (sums[row] > sum) sum = sums[row];
780         }
781       }
782       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
783       PetscCall(PetscFree(sums));
784     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
785   }
786   PetscFunctionReturn(PETSC_SUCCESS);
787 }
788 
789 /*
790   Creates the hash table, and sets the table
791   This table is created only once.
792   If new entries need to be added to the matrix
793   then the hash table has to be destroyed and
794   recreated.
795 */
796 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
797 {
798   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
799   Mat          A = baij->A, B = baij->B;
800   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
801   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
802   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
803   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
804   PetscInt    *HT, key;
805   MatScalar  **HD;
806   PetscReal    tmp;
807 #if defined(PETSC_USE_INFO)
808   PetscInt ct = 0, max = 0;
809 #endif
810 
811   PetscFunctionBegin;
812   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
813 
814   baij->ht_size = (PetscInt)(factor * nz);
815   ht_size       = baij->ht_size;
816 
817   /* Allocate Memory for Hash Table */
818   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
819   HD = baij->hd;
820   HT = baij->ht;
821 
822   /* Loop Over A */
823   for (i = 0; i < a->mbs; i++) {
824     for (j = ai[i]; j < ai[i + 1]; j++) {
825       row = i + rstart;
826       col = aj[j] + cstart;
827 
828       key = row * Nbs + col + 1;
829       h1  = HASH(ht_size, key, tmp);
830       for (k = 0; k < ht_size; k++) {
831         if (!HT[(h1 + k) % ht_size]) {
832           HT[(h1 + k) % ht_size] = key;
833           HD[(h1 + k) % ht_size] = a->a + j * bs2;
834           break;
835 #if defined(PETSC_USE_INFO)
836         } else {
837           ct++;
838 #endif
839         }
840       }
841 #if defined(PETSC_USE_INFO)
842       if (k > max) max = k;
843 #endif
844     }
845   }
846   /* Loop Over B */
847   for (i = 0; i < b->mbs; i++) {
848     for (j = bi[i]; j < bi[i + 1]; j++) {
849       row = i + rstart;
850       col = garray[bj[j]];
851       key = row * Nbs + col + 1;
852       h1  = HASH(ht_size, key, tmp);
853       for (k = 0; k < ht_size; k++) {
854         if (!HT[(h1 + k) % ht_size]) {
855           HT[(h1 + k) % ht_size] = key;
856           HD[(h1 + k) % ht_size] = b->a + j * bs2;
857           break;
858 #if defined(PETSC_USE_INFO)
859         } else {
860           ct++;
861 #endif
862         }
863       }
864 #if defined(PETSC_USE_INFO)
865       if (k > max) max = k;
866 #endif
867     }
868   }
869 
870   /* Print Summary */
871 #if defined(PETSC_USE_INFO)
872   for (i = 0, j = 0; i < ht_size; i++) {
873     if (HT[i]) j++;
874   }
875   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
876 #endif
877   PetscFunctionReturn(PETSC_SUCCESS);
878 }
879 
880 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
881 {
882   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
883   PetscInt     nstash, reallocs;
884 
885   PetscFunctionBegin;
886   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
887 
888   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
889   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
890   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
891   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
892   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
893   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
897 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
898 {
899   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
900   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
901   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
902   PetscInt    *row, *col;
903   PetscBool    r1, r2, r3, other_disassembled;
904   MatScalar   *val;
905   PetscMPIInt  n;
906 
907   PetscFunctionBegin;
908   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
909   if (!baij->donotstash && !mat->nooffprocentries) {
910     while (1) {
911       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
912       if (!flg) break;
913 
914       for (i = 0; i < n;) {
915         /* Now identify the consecutive vals belonging to the same row */
916         for (j = i, rstart = row[j]; j < n; j++) {
917           if (row[j] != rstart) break;
918         }
919         if (j < n) ncols = j - i;
920         else ncols = n - i;
921         /* Now assemble all these values with a single function call */
922         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
923         i = j;
924       }
925     }
926     PetscCall(MatStashScatterEnd_Private(&mat->stash));
927     /* Now process the block-stash. Since the values are stashed column-oriented,
928        set the row-oriented flag to column-oriented, and after MatSetValues()
929        restore the original flags */
930     r1 = baij->roworiented;
931     r2 = a->roworiented;
932     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
933 
934     baij->roworiented                           = PETSC_FALSE;
935     a->roworiented                              = PETSC_FALSE;
936     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
937     while (1) {
938       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
939       if (!flg) break;
940 
941       for (i = 0; i < n;) {
942         /* Now identify the consecutive vals belonging to the same row */
943         for (j = i, rstart = row[j]; j < n; j++) {
944           if (row[j] != rstart) break;
945         }
946         if (j < n) ncols = j - i;
947         else ncols = n - i;
948         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
949         i = j;
950       }
951     }
952     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
953 
954     baij->roworiented                           = r1;
955     a->roworiented                              = r2;
956     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
957   }
958 
959   PetscCall(MatAssemblyBegin(baij->A, mode));
960   PetscCall(MatAssemblyEnd(baij->A, mode));
961 
962   /* determine if any processor has disassembled, if so we must
963      also disassemble ourselves, in order that we may reassemble. */
964   /*
965      if nonzero structure of submatrix B cannot change then we know that
966      no processor disassembled thus we can skip this stuff
967   */
968   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
969     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
970     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
971   }
972 
973   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
974   PetscCall(MatAssemblyBegin(baij->B, mode));
975   PetscCall(MatAssemblyEnd(baij->B, mode));
976 
977 #if defined(PETSC_USE_INFO)
978   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
979     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
980 
981     baij->ht_total_ct  = 0;
982     baij->ht_insert_ct = 0;
983   }
984 #endif
985   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
986     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
987 
988     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
989     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
990   }
991 
992   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
993 
994   baij->rowvalues = NULL;
995 
996   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
997   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
998     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
999     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1000   }
1001   PetscFunctionReturn(PETSC_SUCCESS);
1002 }
1003 
1004 #include <petscdraw.h>
1005 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1006 {
1007   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1008   PetscMPIInt       rank = baij->rank;
1009   PetscInt          bs   = mat->rmap->bs;
1010   PetscBool         iascii, isdraw;
1011   PetscViewer       sviewer;
1012   PetscViewerFormat format;
1013 
1014   PetscFunctionBegin;
1015   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1016   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1017   if (iascii) {
1018     PetscCall(PetscViewerGetFormat(viewer, &format));
1019     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1020       MatInfo info;
1021       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1022       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1023       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1024       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1025                                                    mat->rmap->bs, info.memory));
1026       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1027       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1028       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1029       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1030       PetscCall(PetscViewerFlush(viewer));
1031       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1032       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1033       PetscCall(VecScatterView(baij->Mvctx, viewer));
1034       PetscFunctionReturn(PETSC_SUCCESS);
1035     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1036       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1037       PetscFunctionReturn(PETSC_SUCCESS);
1038     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1039       PetscFunctionReturn(PETSC_SUCCESS);
1040     }
1041   }
1042 
1043   if (isdraw) {
1044     PetscDraw draw;
1045     PetscBool isnull;
1046     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1047     PetscCall(PetscDrawIsNull(draw, &isnull));
1048     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1049   }
1050 
1051   {
1052     /* assemble the entire matrix onto first processor. */
1053     Mat          A;
1054     Mat_SeqBAIJ *Aloc;
1055     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1056     MatScalar   *a;
1057     const char  *matname;
1058 
1059     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1060     /* Perhaps this should be the type of mat? */
1061     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1062     if (rank == 0) {
1063       PetscCall(MatSetSizes(A, M, N, M, N));
1064     } else {
1065       PetscCall(MatSetSizes(A, 0, 0, M, N));
1066     }
1067     PetscCall(MatSetType(A, MATMPIBAIJ));
1068     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1069     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1070 
1071     /* copy over the A part */
1072     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1073     ai   = Aloc->i;
1074     aj   = Aloc->j;
1075     a    = Aloc->a;
1076     PetscCall(PetscMalloc1(bs, &rvals));
1077 
1078     for (i = 0; i < mbs; i++) {
1079       rvals[0] = bs * (baij->rstartbs + i);
1080       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1081       for (j = ai[i]; j < ai[i + 1]; j++) {
1082         col = (baij->cstartbs + aj[j]) * bs;
1083         for (k = 0; k < bs; k++) {
1084           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1085           col++;
1086           a += bs;
1087         }
1088       }
1089     }
1090     /* copy over the B part */
1091     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1092     ai   = Aloc->i;
1093     aj   = Aloc->j;
1094     a    = Aloc->a;
1095     for (i = 0; i < mbs; i++) {
1096       rvals[0] = bs * (baij->rstartbs + i);
1097       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1098       for (j = ai[i]; j < ai[i + 1]; j++) {
1099         col = baij->garray[aj[j]] * bs;
1100         for (k = 0; k < bs; k++) {
1101           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1102           col++;
1103           a += bs;
1104         }
1105       }
1106     }
1107     PetscCall(PetscFree(rvals));
1108     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1109     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1110     /*
1111        Everyone has to call to draw the matrix since the graphics waits are
1112        synchronized across all processors that share the PetscDraw object
1113     */
1114     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1115     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1116     if (rank == 0) {
1117       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1118       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1119     }
1120     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1121     PetscCall(MatDestroy(&A));
1122   }
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 
1126 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1127 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1128 {
1129   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1130   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1131   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1132   const PetscInt *garray = aij->garray;
1133   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1134   PetscCount      nz, hnz;
1135   PetscInt       *rowlens, *colidxs;
1136   PetscScalar    *matvals;
1137   PetscMPIInt     rank;
1138 
1139   PetscFunctionBegin;
1140   PetscCall(PetscViewerSetUp(viewer));
1141 
1142   M  = mat->rmap->N;
1143   N  = mat->cmap->N;
1144   m  = mat->rmap->n;
1145   rs = mat->rmap->rstart;
1146   cs = mat->cmap->rstart;
1147   bs = mat->rmap->bs;
1148   nz = bs * bs * (A->nz + B->nz);
1149 
1150   /* write matrix header */
1151   header[0] = MAT_FILE_CLASSID;
1152   header[1] = M;
1153   header[2] = N;
1154   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1155   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1156   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1157   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1158 
1159   /* fill in and store row lengths */
1160   PetscCall(PetscMalloc1(m, &rowlens));
1161   for (cnt = 0, i = 0; i < A->mbs; i++)
1162     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1163   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1164   PetscCall(PetscFree(rowlens));
1165 
1166   /* fill in and store column indices */
1167   PetscCall(PetscMalloc1(nz, &colidxs));
1168   for (cnt = 0, i = 0; i < A->mbs; i++) {
1169     for (k = 0; k < bs; k++) {
1170       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1171         if (garray[B->j[jb]] > cs / bs) break;
1172         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1173       }
1174       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1175         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1176       for (; jb < B->i[i + 1]; jb++)
1177         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1178     }
1179   }
1180   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1181   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1182   PetscCall(PetscFree(colidxs));
1183 
1184   /* fill in and store nonzero values */
1185   PetscCall(PetscMalloc1(nz, &matvals));
1186   for (cnt = 0, i = 0; i < A->mbs; i++) {
1187     for (k = 0; k < bs; k++) {
1188       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1189         if (garray[B->j[jb]] > cs / bs) break;
1190         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1191       }
1192       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1193         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1194       for (; jb < B->i[i + 1]; jb++)
1195         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1196     }
1197   }
1198   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1199   PetscCall(PetscFree(matvals));
1200 
1201   /* write block size option to the viewer's .info file */
1202   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1203   PetscFunctionReturn(PETSC_SUCCESS);
1204 }
1205 
1206 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1207 {
1208   PetscBool iascii, isdraw, issocket, isbinary;
1209 
1210   PetscFunctionBegin;
1211   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1212   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1213   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1214   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1215   if (iascii || isdraw || issocket) {
1216     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1217   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1218   PetscFunctionReturn(PETSC_SUCCESS);
1219 }
1220 
1221 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1222 {
1223   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1224   PetscInt     nt;
1225 
1226   PetscFunctionBegin;
1227   PetscCall(VecGetLocalSize(xx, &nt));
1228   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1229   PetscCall(VecGetLocalSize(yy, &nt));
1230   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1231   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1232   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1233   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1234   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1235   PetscFunctionReturn(PETSC_SUCCESS);
1236 }
1237 
1238 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1239 {
1240   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1241 
1242   PetscFunctionBegin;
1243   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1244   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1245   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1246   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1247   PetscFunctionReturn(PETSC_SUCCESS);
1248 }
1249 
1250 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1251 {
1252   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1253 
1254   PetscFunctionBegin;
1255   /* do nondiagonal part */
1256   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1257   /* do local part */
1258   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1259   /* add partial results together */
1260   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1261   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1262   PetscFunctionReturn(PETSC_SUCCESS);
1263 }
1264 
1265 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1266 {
1267   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1268 
1269   PetscFunctionBegin;
1270   /* do nondiagonal part */
1271   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1272   /* do local part */
1273   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1274   /* add partial results together */
1275   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1276   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1277   PetscFunctionReturn(PETSC_SUCCESS);
1278 }
1279 
1280 /*
1281   This only works correctly for square matrices where the subblock A->A is the
1282    diagonal block
1283 */
1284 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1285 {
1286   PetscFunctionBegin;
1287   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1288   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1289   PetscFunctionReturn(PETSC_SUCCESS);
1290 }
1291 
1292 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1293 {
1294   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1295 
1296   PetscFunctionBegin;
1297   PetscCall(MatScale(a->A, aa));
1298   PetscCall(MatScale(a->B, aa));
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1303 {
1304   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1305   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1306   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1307   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1308   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1309 
1310   PetscFunctionBegin;
1311   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1312   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1313   mat->getrowactive = PETSC_TRUE;
1314 
1315   if (!mat->rowvalues && (idx || v)) {
1316     /*
1317         allocate enough space to hold information from the longest row.
1318     */
1319     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1320     PetscInt     max = 1, mbs = mat->mbs, tmp;
1321     for (i = 0; i < mbs; i++) {
1322       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1323       if (max < tmp) max = tmp;
1324     }
1325     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1326   }
1327   lrow = row - brstart;
1328 
1329   pvA = &vworkA;
1330   pcA = &cworkA;
1331   pvB = &vworkB;
1332   pcB = &cworkB;
1333   if (!v) {
1334     pvA = NULL;
1335     pvB = NULL;
1336   }
1337   if (!idx) {
1338     pcA = NULL;
1339     if (!v) pcB = NULL;
1340   }
1341   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1342   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1343   nztot = nzA + nzB;
1344 
1345   cmap = mat->garray;
1346   if (v || idx) {
1347     if (nztot) {
1348       /* Sort by increasing column numbers, assuming A and B already sorted */
1349       PetscInt imark = -1;
1350       if (v) {
1351         *v = v_p = mat->rowvalues;
1352         for (i = 0; i < nzB; i++) {
1353           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1354           else break;
1355         }
1356         imark = i;
1357         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1358         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1359       }
1360       if (idx) {
1361         *idx = idx_p = mat->rowindices;
1362         if (imark > -1) {
1363           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1364         } else {
1365           for (i = 0; i < nzB; i++) {
1366             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1367             else break;
1368           }
1369           imark = i;
1370         }
1371         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1372         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1373       }
1374     } else {
1375       if (idx) *idx = NULL;
1376       if (v) *v = NULL;
1377     }
1378   }
1379   *nz = nztot;
1380   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1381   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1382   PetscFunctionReturn(PETSC_SUCCESS);
1383 }
1384 
1385 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1386 {
1387   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1388 
1389   PetscFunctionBegin;
1390   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1391   baij->getrowactive = PETSC_FALSE;
1392   PetscFunctionReturn(PETSC_SUCCESS);
1393 }
1394 
1395 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1396 {
1397   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1398 
1399   PetscFunctionBegin;
1400   PetscCall(MatZeroEntries(l->A));
1401   PetscCall(MatZeroEntries(l->B));
1402   PetscFunctionReturn(PETSC_SUCCESS);
1403 }
1404 
1405 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1406 {
1407   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1408   Mat            A = a->A, B = a->B;
1409   PetscLogDouble isend[5], irecv[5];
1410 
1411   PetscFunctionBegin;
1412   info->block_size = (PetscReal)matin->rmap->bs;
1413 
1414   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1415 
1416   isend[0] = info->nz_used;
1417   isend[1] = info->nz_allocated;
1418   isend[2] = info->nz_unneeded;
1419   isend[3] = info->memory;
1420   isend[4] = info->mallocs;
1421 
1422   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1423 
1424   isend[0] += info->nz_used;
1425   isend[1] += info->nz_allocated;
1426   isend[2] += info->nz_unneeded;
1427   isend[3] += info->memory;
1428   isend[4] += info->mallocs;
1429 
1430   if (flag == MAT_LOCAL) {
1431     info->nz_used      = isend[0];
1432     info->nz_allocated = isend[1];
1433     info->nz_unneeded  = isend[2];
1434     info->memory       = isend[3];
1435     info->mallocs      = isend[4];
1436   } else if (flag == MAT_GLOBAL_MAX) {
1437     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1438 
1439     info->nz_used      = irecv[0];
1440     info->nz_allocated = irecv[1];
1441     info->nz_unneeded  = irecv[2];
1442     info->memory       = irecv[3];
1443     info->mallocs      = irecv[4];
1444   } else if (flag == MAT_GLOBAL_SUM) {
1445     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1446 
1447     info->nz_used      = irecv[0];
1448     info->nz_allocated = irecv[1];
1449     info->nz_unneeded  = irecv[2];
1450     info->memory       = irecv[3];
1451     info->mallocs      = irecv[4];
1452   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1453   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1454   info->fill_ratio_needed = 0;
1455   info->factor_mallocs    = 0;
1456   PetscFunctionReturn(PETSC_SUCCESS);
1457 }
1458 
1459 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1460 {
1461   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1462 
1463   PetscFunctionBegin;
1464   switch (op) {
1465   case MAT_NEW_NONZERO_LOCATIONS:
1466   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1467   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1468   case MAT_KEEP_NONZERO_PATTERN:
1469   case MAT_NEW_NONZERO_LOCATION_ERR:
1470     MatCheckPreallocated(A, 1);
1471     PetscCall(MatSetOption(a->A, op, flg));
1472     PetscCall(MatSetOption(a->B, op, flg));
1473     break;
1474   case MAT_ROW_ORIENTED:
1475     MatCheckPreallocated(A, 1);
1476     a->roworiented = flg;
1477 
1478     PetscCall(MatSetOption(a->A, op, flg));
1479     PetscCall(MatSetOption(a->B, op, flg));
1480     break;
1481   case MAT_IGNORE_OFF_PROC_ENTRIES:
1482     a->donotstash = flg;
1483     break;
1484   case MAT_USE_HASH_TABLE:
1485     a->ht_flag = flg;
1486     a->ht_fact = 1.39;
1487     break;
1488   case MAT_SYMMETRIC:
1489   case MAT_STRUCTURALLY_SYMMETRIC:
1490   case MAT_HERMITIAN:
1491   case MAT_SYMMETRY_ETERNAL:
1492   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1493   case MAT_SPD_ETERNAL:
1494     /* if the diagonal matrix is square it inherits some of the properties above */
1495     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1496     break;
1497   default:
1498     break;
1499   }
1500   PetscFunctionReturn(PETSC_SUCCESS);
1501 }
1502 
1503 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1504 {
1505   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1506   Mat_SeqBAIJ *Aloc;
1507   Mat          B;
1508   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1509   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1510   MatScalar   *a;
1511 
1512   PetscFunctionBegin;
1513   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1514   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1515     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1516     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1517     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1518     /* Do not know preallocation information, but must set block size */
1519     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1520   } else {
1521     B = *matout;
1522   }
1523 
1524   /* copy over the A part */
1525   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1526   ai   = Aloc->i;
1527   aj   = Aloc->j;
1528   a    = Aloc->a;
1529   PetscCall(PetscMalloc1(bs, &rvals));
1530 
1531   for (i = 0; i < mbs; i++) {
1532     rvals[0] = bs * (baij->rstartbs + i);
1533     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       col = (baij->cstartbs + aj[j]) * bs;
1536       for (k = 0; k < bs; k++) {
1537         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1538 
1539         col++;
1540         a += bs;
1541       }
1542     }
1543   }
1544   /* copy over the B part */
1545   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1546   ai   = Aloc->i;
1547   aj   = Aloc->j;
1548   a    = Aloc->a;
1549   for (i = 0; i < mbs; i++) {
1550     rvals[0] = bs * (baij->rstartbs + i);
1551     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1552     for (j = ai[i]; j < ai[i + 1]; j++) {
1553       col = baij->garray[aj[j]] * bs;
1554       for (k = 0; k < bs; k++) {
1555         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1556         col++;
1557         a += bs;
1558       }
1559     }
1560   }
1561   PetscCall(PetscFree(rvals));
1562   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1563   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1564 
1565   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1566   else PetscCall(MatHeaderMerge(A, &B));
1567   PetscFunctionReturn(PETSC_SUCCESS);
1568 }
1569 
1570 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1571 {
1572   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1573   Mat          a = baij->A, b = baij->B;
1574   PetscInt     s1, s2, s3;
1575 
1576   PetscFunctionBegin;
1577   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1578   if (rr) {
1579     PetscCall(VecGetLocalSize(rr, &s1));
1580     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1581     /* Overlap communication with computation. */
1582     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1583   }
1584   if (ll) {
1585     PetscCall(VecGetLocalSize(ll, &s1));
1586     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1587     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1588   }
1589   /* scale  the diagonal block */
1590   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1591 
1592   if (rr) {
1593     /* Do a scatter end and then right scale the off-diagonal block */
1594     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1595     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1596   }
1597   PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599 
1600 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1601 {
1602   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1603   PetscInt    *lrows;
1604   PetscInt     r, len;
1605   PetscBool    cong;
1606 
1607   PetscFunctionBegin;
1608   /* get locally owned rows */
1609   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1610   /* fix right-hand side if needed */
1611   if (x && b) {
1612     const PetscScalar *xx;
1613     PetscScalar       *bb;
1614 
1615     PetscCall(VecGetArrayRead(x, &xx));
1616     PetscCall(VecGetArray(b, &bb));
1617     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1618     PetscCall(VecRestoreArrayRead(x, &xx));
1619     PetscCall(VecRestoreArray(b, &bb));
1620   }
1621 
1622   /* actually zap the local rows */
1623   /*
1624         Zero the required rows. If the "diagonal block" of the matrix
1625      is square and the user wishes to set the diagonal we use separate
1626      code so that MatSetValues() is not called for each diagonal allocating
1627      new memory, thus calling lots of mallocs and slowing things down.
1628 
1629   */
1630   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1631   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1632   PetscCall(MatHasCongruentLayouts(A, &cong));
1633   if ((diag != 0.0) && cong) {
1634     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1635   } else if (diag != 0.0) {
1636     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1637     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1638     for (r = 0; r < len; ++r) {
1639       const PetscInt row = lrows[r] + A->rmap->rstart;
1640       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1641     }
1642     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1643     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1644   } else {
1645     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1646   }
1647   PetscCall(PetscFree(lrows));
1648 
1649   /* only change matrix nonzero state if pattern was allowed to be changed */
1650   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1651     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1652     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1653   }
1654   PetscFunctionReturn(PETSC_SUCCESS);
1655 }
1656 
1657 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1658 {
1659   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1660   PetscMPIInt        n, p = 0;
1661   PetscInt           i, j, k, r, len = 0, row, col, count;
1662   PetscInt          *lrows, *owners = A->rmap->range;
1663   PetscSFNode       *rrows;
1664   PetscSF            sf;
1665   const PetscScalar *xx;
1666   PetscScalar       *bb, *mask;
1667   Vec                xmask, lmask;
1668   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1669   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1670   PetscScalar       *aa;
1671 
1672   PetscFunctionBegin;
1673   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1674   /* Create SF where leaves are input rows and roots are owned rows */
1675   PetscCall(PetscMalloc1(n, &lrows));
1676   for (r = 0; r < n; ++r) lrows[r] = -1;
1677   PetscCall(PetscMalloc1(N, &rrows));
1678   for (r = 0; r < N; ++r) {
1679     const PetscInt idx = rows[r];
1680     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1681     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1682       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1683     }
1684     rrows[r].rank  = p;
1685     rrows[r].index = rows[r] - owners[p];
1686   }
1687   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1688   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1689   /* Collect flags for rows to be zeroed */
1690   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1691   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1692   PetscCall(PetscSFDestroy(&sf));
1693   /* Compress and put in row numbers */
1694   for (r = 0; r < n; ++r)
1695     if (lrows[r] >= 0) lrows[len++] = r;
1696   /* zero diagonal part of matrix */
1697   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1698   /* handle off-diagonal part of matrix */
1699   PetscCall(MatCreateVecs(A, &xmask, NULL));
1700   PetscCall(VecDuplicate(l->lvec, &lmask));
1701   PetscCall(VecGetArray(xmask, &bb));
1702   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1703   PetscCall(VecRestoreArray(xmask, &bb));
1704   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1705   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1706   PetscCall(VecDestroy(&xmask));
1707   if (x) {
1708     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1709     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1710     PetscCall(VecGetArrayRead(l->lvec, &xx));
1711     PetscCall(VecGetArray(b, &bb));
1712   }
1713   PetscCall(VecGetArray(lmask, &mask));
1714   /* remove zeroed rows of off-diagonal matrix */
1715   for (i = 0; i < len; ++i) {
1716     row   = lrows[i];
1717     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1718     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1719     for (k = 0; k < count; ++k) {
1720       aa[0] = 0.0;
1721       aa += bs;
1722     }
1723   }
1724   /* loop over all elements of off process part of matrix zeroing removed columns*/
1725   for (i = 0; i < l->B->rmap->N; ++i) {
1726     row = i / bs;
1727     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1728       for (k = 0; k < bs; ++k) {
1729         col = bs * baij->j[j] + k;
1730         if (PetscAbsScalar(mask[col])) {
1731           aa = baij->a + j * bs2 + (i % bs) + bs * k;
1732           if (x) bb[i] -= aa[0] * xx[col];
1733           aa[0] = 0.0;
1734         }
1735       }
1736     }
1737   }
1738   if (x) {
1739     PetscCall(VecRestoreArray(b, &bb));
1740     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1741   }
1742   PetscCall(VecRestoreArray(lmask, &mask));
1743   PetscCall(VecDestroy(&lmask));
1744   PetscCall(PetscFree(lrows));
1745 
1746   /* only change matrix nonzero state if pattern was allowed to be changed */
1747   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1748     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1749     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1750   }
1751   PetscFunctionReturn(PETSC_SUCCESS);
1752 }
1753 
1754 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1755 {
1756   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1757 
1758   PetscFunctionBegin;
1759   PetscCall(MatSetUnfactored(a->A));
1760   PetscFunctionReturn(PETSC_SUCCESS);
1761 }
1762 
1763 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1764 
1765 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1766 {
1767   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1768   Mat          a, b, c, d;
1769   PetscBool    flg;
1770 
1771   PetscFunctionBegin;
1772   a = matA->A;
1773   b = matA->B;
1774   c = matB->A;
1775   d = matB->B;
1776 
1777   PetscCall(MatEqual(a, c, &flg));
1778   if (flg) PetscCall(MatEqual(b, d, &flg));
1779   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1780   PetscFunctionReturn(PETSC_SUCCESS);
1781 }
1782 
1783 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1784 {
1785   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1786   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1787 
1788   PetscFunctionBegin;
1789   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1790   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1791     PetscCall(MatCopy_Basic(A, B, str));
1792   } else {
1793     PetscCall(MatCopy(a->A, b->A, str));
1794     PetscCall(MatCopy(a->B, b->B, str));
1795   }
1796   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1797   PetscFunctionReturn(PETSC_SUCCESS);
1798 }
1799 
1800 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1801 {
1802   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1803   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1804   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1805 
1806   PetscFunctionBegin;
1807   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1808   PetscFunctionReturn(PETSC_SUCCESS);
1809 }
1810 
1811 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1812 {
1813   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1814   PetscBLASInt bnz, one                         = 1;
1815   Mat_SeqBAIJ *x, *y;
1816   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1817 
1818   PetscFunctionBegin;
1819   if (str == SAME_NONZERO_PATTERN) {
1820     PetscScalar alpha = a;
1821     x                 = (Mat_SeqBAIJ *)xx->A->data;
1822     y                 = (Mat_SeqBAIJ *)yy->A->data;
1823     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1824     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1825     x = (Mat_SeqBAIJ *)xx->B->data;
1826     y = (Mat_SeqBAIJ *)yy->B->data;
1827     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1828     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1829     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1830   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1831     PetscCall(MatAXPY_Basic(Y, a, X, str));
1832   } else {
1833     Mat       B;
1834     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1835     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1836     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1837     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1838     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1839     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1840     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1841     PetscCall(MatSetType(B, MATMPIBAIJ));
1842     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1843     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1844     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1845     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1846     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1847     PetscCall(MatHeaderMerge(Y, &B));
1848     PetscCall(PetscFree(nnz_d));
1849     PetscCall(PetscFree(nnz_o));
1850   }
1851   PetscFunctionReturn(PETSC_SUCCESS);
1852 }
1853 
1854 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1855 {
1856   PetscFunctionBegin;
1857   if (PetscDefined(USE_COMPLEX)) {
1858     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1859 
1860     PetscCall(MatConjugate_SeqBAIJ(a->A));
1861     PetscCall(MatConjugate_SeqBAIJ(a->B));
1862   }
1863   PetscFunctionReturn(PETSC_SUCCESS);
1864 }
1865 
1866 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1867 {
1868   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1869 
1870   PetscFunctionBegin;
1871   PetscCall(MatRealPart(a->A));
1872   PetscCall(MatRealPart(a->B));
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1877 {
1878   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1879 
1880   PetscFunctionBegin;
1881   PetscCall(MatImaginaryPart(a->A));
1882   PetscCall(MatImaginaryPart(a->B));
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1887 {
1888   IS       iscol_local;
1889   PetscInt csize;
1890 
1891   PetscFunctionBegin;
1892   PetscCall(ISGetLocalSize(iscol, &csize));
1893   if (call == MAT_REUSE_MATRIX) {
1894     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1895     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1896   } else {
1897     PetscCall(ISAllGather(iscol, &iscol_local));
1898   }
1899   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1900   if (call == MAT_INITIAL_MATRIX) {
1901     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1902     PetscCall(ISDestroy(&iscol_local));
1903   }
1904   PetscFunctionReturn(PETSC_SUCCESS);
1905 }
1906 
1907 /*
1908   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1909   in local and then by concatenating the local matrices the end result.
1910   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1911   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1912 */
1913 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1914 {
1915   PetscMPIInt  rank, size;
1916   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1917   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1918   Mat          M, Mreuse;
1919   MatScalar   *vwork, *aa;
1920   MPI_Comm     comm;
1921   IS           isrow_new, iscol_new;
1922   Mat_SeqBAIJ *aij;
1923 
1924   PetscFunctionBegin;
1925   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1926   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1927   PetscCallMPI(MPI_Comm_size(comm, &size));
1928   /* The compression and expansion should be avoided. Doesn't point
1929      out errors, might change the indices, hence buggey */
1930   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1931   if (isrow == iscol) {
1932     iscol_new = isrow_new;
1933     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1934   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1935 
1936   if (call == MAT_REUSE_MATRIX) {
1937     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1938     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1939     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1940   } else {
1941     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1942   }
1943   PetscCall(ISDestroy(&isrow_new));
1944   PetscCall(ISDestroy(&iscol_new));
1945   /*
1946       m - number of local rows
1947       n - number of columns (same on all processors)
1948       rstart - first row in new global matrix generated
1949   */
1950   PetscCall(MatGetBlockSize(mat, &bs));
1951   PetscCall(MatGetSize(Mreuse, &m, &n));
1952   m = m / bs;
1953   n = n / bs;
1954 
1955   if (call == MAT_INITIAL_MATRIX) {
1956     aij = (Mat_SeqBAIJ *)Mreuse->data;
1957     ii  = aij->i;
1958     jj  = aij->j;
1959 
1960     /*
1961         Determine the number of non-zeros in the diagonal and off-diagonal
1962         portions of the matrix in order to do correct preallocation
1963     */
1964 
1965     /* first get start and end of "diagonal" columns */
1966     if (csize == PETSC_DECIDE) {
1967       PetscCall(ISGetSize(isrow, &mglobal));
1968       if (mglobal == n * bs) { /* square matrix */
1969         nlocal = m;
1970       } else {
1971         nlocal = n / size + ((n % size) > rank);
1972       }
1973     } else {
1974       nlocal = csize / bs;
1975     }
1976     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1977     rstart = rend - nlocal;
1978     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1979 
1980     /* next, compute all the lengths */
1981     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1982     for (i = 0; i < m; i++) {
1983       jend = ii[i + 1] - ii[i];
1984       olen = 0;
1985       dlen = 0;
1986       for (j = 0; j < jend; j++) {
1987         if (*jj < rstart || *jj >= rend) olen++;
1988         else dlen++;
1989         jj++;
1990       }
1991       olens[i] = olen;
1992       dlens[i] = dlen;
1993     }
1994     PetscCall(MatCreate(comm, &M));
1995     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1996     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1997     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1998     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1999     PetscCall(PetscFree2(dlens, olens));
2000   } else {
2001     PetscInt ml, nl;
2002 
2003     M = *newmat;
2004     PetscCall(MatGetLocalSize(M, &ml, &nl));
2005     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2006     PetscCall(MatZeroEntries(M));
2007     /*
2008          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2009        rather than the slower MatSetValues().
2010     */
2011     M->was_assembled = PETSC_TRUE;
2012     M->assembled     = PETSC_FALSE;
2013   }
2014   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2015   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2016   aij = (Mat_SeqBAIJ *)Mreuse->data;
2017   ii  = aij->i;
2018   jj  = aij->j;
2019   aa  = aij->a;
2020   for (i = 0; i < m; i++) {
2021     row   = rstart / bs + i;
2022     nz    = ii[i + 1] - ii[i];
2023     cwork = jj;
2024     jj    = PetscSafePointerPlusOffset(jj, nz);
2025     vwork = aa;
2026     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2027     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2028   }
2029 
2030   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2031   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2032   *newmat = M;
2033 
2034   /* save submatrix used in processor for next request */
2035   if (call == MAT_INITIAL_MATRIX) {
2036     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2037     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2038   }
2039   PetscFunctionReturn(PETSC_SUCCESS);
2040 }
2041 
2042 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2043 {
2044   MPI_Comm        comm, pcomm;
2045   PetscInt        clocal_size, nrows;
2046   const PetscInt *rows;
2047   PetscMPIInt     size;
2048   IS              crowp, lcolp;
2049 
2050   PetscFunctionBegin;
2051   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2052   /* make a collective version of 'rowp' */
2053   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2054   if (pcomm == comm) {
2055     crowp = rowp;
2056   } else {
2057     PetscCall(ISGetSize(rowp, &nrows));
2058     PetscCall(ISGetIndices(rowp, &rows));
2059     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2060     PetscCall(ISRestoreIndices(rowp, &rows));
2061   }
2062   PetscCall(ISSetPermutation(crowp));
2063   /* make a local version of 'colp' */
2064   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2065   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2066   if (size == 1) {
2067     lcolp = colp;
2068   } else {
2069     PetscCall(ISAllGather(colp, &lcolp));
2070   }
2071   PetscCall(ISSetPermutation(lcolp));
2072   /* now we just get the submatrix */
2073   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2074   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2075   /* clean up */
2076   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2077   if (size > 1) PetscCall(ISDestroy(&lcolp));
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2082 {
2083   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2084   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2085 
2086   PetscFunctionBegin;
2087   if (nghosts) *nghosts = B->nbs;
2088   if (ghosts) *ghosts = baij->garray;
2089   PetscFunctionReturn(PETSC_SUCCESS);
2090 }
2091 
2092 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2093 {
2094   Mat          B;
2095   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2096   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2097   Mat_SeqAIJ  *b;
2098   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2099   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2100   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2101 
2102   PetscFunctionBegin;
2103   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2104   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2105 
2106   /*   Tell every processor the number of nonzeros per row  */
2107   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2108   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2109   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2110   displs = recvcounts + size;
2111   for (i = 0; i < size; i++) {
2112     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2113     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2114   }
2115   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2116   /* Create the sequential matrix of the same type as the local block diagonal  */
2117   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2118   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2119   PetscCall(MatSetType(B, MATSEQAIJ));
2120   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2121   b = (Mat_SeqAIJ *)B->data;
2122 
2123   /*     Copy my part of matrix column indices over  */
2124   sendcount  = ad->nz + bd->nz;
2125   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2126   a_jsendbuf = ad->j;
2127   b_jsendbuf = bd->j;
2128   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2129   cnt        = 0;
2130   for (i = 0; i < n; i++) {
2131     /* put in lower diagonal portion */
2132     m = bd->i[i + 1] - bd->i[i];
2133     while (m > 0) {
2134       /* is it above diagonal (in bd (compressed) numbering) */
2135       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2136       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2137       m--;
2138     }
2139 
2140     /* put in diagonal portion */
2141     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2142 
2143     /* put in upper diagonal portion */
2144     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2145   }
2146   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2147 
2148   /*  Gather all column indices to all processors  */
2149   for (i = 0; i < size; i++) {
2150     recvcounts[i] = 0;
2151     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2152   }
2153   displs[0] = 0;
2154   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2155   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2156   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2157   /* set the b->ilen (length of each row) values */
2158   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2159   /* set the b->i indices */
2160   b->i[0] = 0;
2161   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2162   PetscCall(PetscFree(lens));
2163   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2164   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2165   PetscCall(PetscFree(recvcounts));
2166 
2167   PetscCall(MatPropagateSymmetryOptions(A, B));
2168   *newmat = B;
2169   PetscFunctionReturn(PETSC_SUCCESS);
2170 }
2171 
2172 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2173 {
2174   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2175   Vec          bb1 = NULL;
2176 
2177   PetscFunctionBegin;
2178   if (flag == SOR_APPLY_UPPER) {
2179     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2180     PetscFunctionReturn(PETSC_SUCCESS);
2181   }
2182 
2183   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2184 
2185   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2186     if (flag & SOR_ZERO_INITIAL_GUESS) {
2187       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2188       its--;
2189     }
2190 
2191     while (its--) {
2192       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2193       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2194 
2195       /* update rhs: bb1 = bb - B*x */
2196       PetscCall(VecScale(mat->lvec, -1.0));
2197       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2198 
2199       /* local sweep */
2200       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2201     }
2202   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2203     if (flag & SOR_ZERO_INITIAL_GUESS) {
2204       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2205       its--;
2206     }
2207     while (its--) {
2208       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2209       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2210 
2211       /* update rhs: bb1 = bb - B*x */
2212       PetscCall(VecScale(mat->lvec, -1.0));
2213       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2214 
2215       /* local sweep */
2216       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2217     }
2218   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2219     if (flag & SOR_ZERO_INITIAL_GUESS) {
2220       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2221       its--;
2222     }
2223     while (its--) {
2224       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2225       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2226 
2227       /* update rhs: bb1 = bb - B*x */
2228       PetscCall(VecScale(mat->lvec, -1.0));
2229       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2230 
2231       /* local sweep */
2232       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2233     }
2234   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2235 
2236   PetscCall(VecDestroy(&bb1));
2237   PetscFunctionReturn(PETSC_SUCCESS);
2238 }
2239 
2240 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2241 {
2242   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2243   PetscInt     m, N, i, *garray = aij->garray;
2244   PetscInt     ib, jb, bs = A->rmap->bs;
2245   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2246   MatScalar   *a_val = a_aij->a;
2247   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2248   MatScalar   *b_val = b_aij->a;
2249   PetscReal   *work;
2250 
2251   PetscFunctionBegin;
2252   PetscCall(MatGetSize(A, &m, &N));
2253   PetscCall(PetscCalloc1(N, &work));
2254   if (type == NORM_2) {
2255     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2256       for (jb = 0; jb < bs; jb++) {
2257         for (ib = 0; ib < bs; ib++) {
2258           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2259           a_val++;
2260         }
2261       }
2262     }
2263     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2264       for (jb = 0; jb < bs; jb++) {
2265         for (ib = 0; ib < bs; ib++) {
2266           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2267           b_val++;
2268         }
2269       }
2270     }
2271   } else if (type == NORM_1) {
2272     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2273       for (jb = 0; jb < bs; jb++) {
2274         for (ib = 0; ib < bs; ib++) {
2275           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2276           a_val++;
2277         }
2278       }
2279     }
2280     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2281       for (jb = 0; jb < bs; jb++) {
2282         for (ib = 0; ib < bs; ib++) {
2283           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2284           b_val++;
2285         }
2286       }
2287     }
2288   } else if (type == NORM_INFINITY) {
2289     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2290       for (jb = 0; jb < bs; jb++) {
2291         for (ib = 0; ib < bs; ib++) {
2292           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2293           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2294           a_val++;
2295         }
2296       }
2297     }
2298     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2299       for (jb = 0; jb < bs; jb++) {
2300         for (ib = 0; ib < bs; ib++) {
2301           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2302           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2303           b_val++;
2304         }
2305       }
2306     }
2307   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2308     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2309       for (jb = 0; jb < bs; jb++) {
2310         for (ib = 0; ib < bs; ib++) {
2311           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2312           a_val++;
2313         }
2314       }
2315     }
2316     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2317       for (jb = 0; jb < bs; jb++) {
2318         for (ib = 0; ib < bs; ib++) {
2319           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2320           b_val++;
2321         }
2322       }
2323     }
2324   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2325     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2326       for (jb = 0; jb < bs; jb++) {
2327         for (ib = 0; ib < bs; ib++) {
2328           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2329           a_val++;
2330         }
2331       }
2332     }
2333     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2334       for (jb = 0; jb < bs; jb++) {
2335         for (ib = 0; ib < bs; ib++) {
2336           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2337           b_val++;
2338         }
2339       }
2340     }
2341   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2342   if (type == NORM_INFINITY) {
2343     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2344   } else {
2345     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2346   }
2347   PetscCall(PetscFree(work));
2348   if (type == NORM_2) {
2349     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2350   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2351     for (i = 0; i < N; i++) reductions[i] /= m;
2352   }
2353   PetscFunctionReturn(PETSC_SUCCESS);
2354 }
2355 
2356 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2357 {
2358   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2359 
2360   PetscFunctionBegin;
2361   PetscCall(MatInvertBlockDiagonal(a->A, values));
2362   A->factorerrortype             = a->A->factorerrortype;
2363   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2364   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2365   PetscFunctionReturn(PETSC_SUCCESS);
2366 }
2367 
2368 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2369 {
2370   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2371   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2372 
2373   PetscFunctionBegin;
2374   if (!Y->preallocated) {
2375     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2376   } else if (!aij->nz) {
2377     PetscInt nonew = aij->nonew;
2378     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2379     aij->nonew = nonew;
2380   }
2381   PetscCall(MatShift_Basic(Y, a));
2382   PetscFunctionReturn(PETSC_SUCCESS);
2383 }
2384 
2385 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2386 {
2387   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2388 
2389   PetscFunctionBegin;
2390   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2391   PetscCall(MatMissingDiagonal(a->A, missing, d));
2392   if (d) {
2393     PetscInt rstart;
2394     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2395     *d += rstart / A->rmap->bs;
2396   }
2397   PetscFunctionReturn(PETSC_SUCCESS);
2398 }
2399 
2400 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2401 {
2402   PetscFunctionBegin;
2403   *a = ((Mat_MPIBAIJ *)A->data)->A;
2404   PetscFunctionReturn(PETSC_SUCCESS);
2405 }
2406 
2407 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2408 {
2409   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2410 
2411   PetscFunctionBegin;
2412   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2413   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2414   PetscFunctionReturn(PETSC_SUCCESS);
2415 }
2416 
2417 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2418                                        MatGetRow_MPIBAIJ,
2419                                        MatRestoreRow_MPIBAIJ,
2420                                        MatMult_MPIBAIJ,
2421                                        /* 4*/ MatMultAdd_MPIBAIJ,
2422                                        MatMultTranspose_MPIBAIJ,
2423                                        MatMultTransposeAdd_MPIBAIJ,
2424                                        NULL,
2425                                        NULL,
2426                                        NULL,
2427                                        /*10*/ NULL,
2428                                        NULL,
2429                                        NULL,
2430                                        MatSOR_MPIBAIJ,
2431                                        MatTranspose_MPIBAIJ,
2432                                        /*15*/ MatGetInfo_MPIBAIJ,
2433                                        MatEqual_MPIBAIJ,
2434                                        MatGetDiagonal_MPIBAIJ,
2435                                        MatDiagonalScale_MPIBAIJ,
2436                                        MatNorm_MPIBAIJ,
2437                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2438                                        MatAssemblyEnd_MPIBAIJ,
2439                                        MatSetOption_MPIBAIJ,
2440                                        MatZeroEntries_MPIBAIJ,
2441                                        /*24*/ MatZeroRows_MPIBAIJ,
2442                                        NULL,
2443                                        NULL,
2444                                        NULL,
2445                                        NULL,
2446                                        /*29*/ MatSetUp_MPI_Hash,
2447                                        NULL,
2448                                        NULL,
2449                                        MatGetDiagonalBlock_MPIBAIJ,
2450                                        NULL,
2451                                        /*34*/ MatDuplicate_MPIBAIJ,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        /*39*/ MatAXPY_MPIBAIJ,
2457                                        MatCreateSubMatrices_MPIBAIJ,
2458                                        MatIncreaseOverlap_MPIBAIJ,
2459                                        MatGetValues_MPIBAIJ,
2460                                        MatCopy_MPIBAIJ,
2461                                        /*44*/ NULL,
2462                                        MatScale_MPIBAIJ,
2463                                        MatShift_MPIBAIJ,
2464                                        NULL,
2465                                        MatZeroRowsColumns_MPIBAIJ,
2466                                        /*49*/ NULL,
2467                                        NULL,
2468                                        NULL,
2469                                        NULL,
2470                                        NULL,
2471                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2472                                        NULL,
2473                                        MatSetUnfactored_MPIBAIJ,
2474                                        MatPermute_MPIBAIJ,
2475                                        MatSetValuesBlocked_MPIBAIJ,
2476                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2477                                        MatDestroy_MPIBAIJ,
2478                                        MatView_MPIBAIJ,
2479                                        NULL,
2480                                        NULL,
2481                                        /*64*/ NULL,
2482                                        NULL,
2483                                        NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2487                                        NULL,
2488                                        NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        /*74*/ NULL,
2492                                        MatFDColoringApply_BAIJ,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        /*79*/ NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        MatLoad_MPIBAIJ,
2501                                        /*84*/ NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        /*89*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        /*94*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        /*99*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        MatConjugate_MPIBAIJ,
2520                                        NULL,
2521                                        /*104*/ NULL,
2522                                        MatRealPart_MPIBAIJ,
2523                                        MatImaginaryPart_MPIBAIJ,
2524                                        NULL,
2525                                        NULL,
2526                                        /*109*/ NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        MatMissingDiagonal_MPIBAIJ,
2531                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2532                                        NULL,
2533                                        MatGetGhosts_MPIBAIJ,
2534                                        NULL,
2535                                        NULL,
2536                                        /*119*/ NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        MatGetMultiProcBlock_MPIBAIJ,
2541                                        /*124*/ NULL,
2542                                        MatGetColumnReductions_MPIBAIJ,
2543                                        MatInvertBlockDiagonal_MPIBAIJ,
2544                                        NULL,
2545                                        NULL,
2546                                        /*129*/ NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        /*134*/ NULL,
2552                                        NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        /*139*/ MatSetBlockSizes_Default,
2557                                        NULL,
2558                                        NULL,
2559                                        MatFDColoringSetUp_MPIXAIJ,
2560                                        NULL,
2561                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2562                                        NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        /*150*/ NULL,
2568                                        MatEliminateZeros_MPIBAIJ,
2569                                        MatGetRowSumAbs_MPIBAIJ,
2570                                        NULL,
2571                                        NULL,
2572                                        /*155*/ NULL,
2573                                        MatCopyHashToXAIJ_MPI_Hash};
2574 
2575 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2576 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2577 
2578 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2579 {
2580   PetscInt        m, rstart, cstart, cend;
2581   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2582   const PetscInt *JJ          = NULL;
2583   PetscScalar    *values      = NULL;
2584   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2585   PetscBool       nooffprocentries;
2586 
2587   PetscFunctionBegin;
2588   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2589   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2590   PetscCall(PetscLayoutSetUp(B->rmap));
2591   PetscCall(PetscLayoutSetUp(B->cmap));
2592   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2593   m      = B->rmap->n / bs;
2594   rstart = B->rmap->rstart / bs;
2595   cstart = B->cmap->rstart / bs;
2596   cend   = B->cmap->rend / bs;
2597 
2598   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2599   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2600   for (i = 0; i < m; i++) {
2601     nz = ii[i + 1] - ii[i];
2602     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2603     nz_max = PetscMax(nz_max, nz);
2604     dlen   = 0;
2605     olen   = 0;
2606     JJ     = jj + ii[i];
2607     for (j = 0; j < nz; j++) {
2608       if (*JJ < cstart || *JJ >= cend) olen++;
2609       else dlen++;
2610       JJ++;
2611     }
2612     d_nnz[i] = dlen;
2613     o_nnz[i] = olen;
2614   }
2615   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2616   PetscCall(PetscFree2(d_nnz, o_nnz));
2617 
2618   values = (PetscScalar *)V;
2619   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2620   for (i = 0; i < m; i++) {
2621     PetscInt        row   = i + rstart;
2622     PetscInt        ncols = ii[i + 1] - ii[i];
2623     const PetscInt *icols = jj + ii[i];
2624     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2625       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2626       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2627     } else { /* block ordering does not match so we can only insert one block at a time. */
2628       PetscInt j;
2629       for (j = 0; j < ncols; j++) {
2630         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2631         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2632       }
2633     }
2634   }
2635 
2636   if (!V) PetscCall(PetscFree(values));
2637   nooffprocentries    = B->nooffprocentries;
2638   B->nooffprocentries = PETSC_TRUE;
2639   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2640   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2641   B->nooffprocentries = nooffprocentries;
2642 
2643   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2644   PetscFunctionReturn(PETSC_SUCCESS);
2645 }
2646 
2647 /*@C
2648   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2649 
2650   Collective
2651 
2652   Input Parameters:
2653 + B  - the matrix
2654 . bs - the block size
2655 . i  - the indices into `j` for the start of each local row (starts with zero)
2656 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2657 - v  - optional values in the matrix, use `NULL` if not provided
2658 
2659   Level: advanced
2660 
2661   Notes:
2662   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2663   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2664   called this routine.
2665 
2666   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2667   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2668   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2669   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2670   block column and the second index is over columns within a block.
2671 
2672   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2673 
2674 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2675 @*/
2676 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2677 {
2678   PetscFunctionBegin;
2679   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2680   PetscValidType(B, 1);
2681   PetscValidLogicalCollectiveInt(B, bs, 2);
2682   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2683   PetscFunctionReturn(PETSC_SUCCESS);
2684 }
2685 
2686 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2687 {
2688   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2689   PetscInt     i;
2690   PetscMPIInt  size;
2691 
2692   PetscFunctionBegin;
2693   if (B->hash_active) {
2694     B->ops[0]      = b->cops;
2695     B->hash_active = PETSC_FALSE;
2696   }
2697   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2698   PetscCall(MatSetBlockSize(B, bs));
2699   PetscCall(PetscLayoutSetUp(B->rmap));
2700   PetscCall(PetscLayoutSetUp(B->cmap));
2701   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2702 
2703   if (d_nnz) {
2704     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2705   }
2706   if (o_nnz) {
2707     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2708   }
2709 
2710   b->bs2 = bs * bs;
2711   b->mbs = B->rmap->n / bs;
2712   b->nbs = B->cmap->n / bs;
2713   b->Mbs = B->rmap->N / bs;
2714   b->Nbs = B->cmap->N / bs;
2715 
2716   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2717   b->rstartbs = B->rmap->rstart / bs;
2718   b->rendbs   = B->rmap->rend / bs;
2719   b->cstartbs = B->cmap->rstart / bs;
2720   b->cendbs   = B->cmap->rend / bs;
2721 
2722 #if defined(PETSC_USE_CTABLE)
2723   PetscCall(PetscHMapIDestroy(&b->colmap));
2724 #else
2725   PetscCall(PetscFree(b->colmap));
2726 #endif
2727   PetscCall(PetscFree(b->garray));
2728   PetscCall(VecDestroy(&b->lvec));
2729   PetscCall(VecScatterDestroy(&b->Mvctx));
2730 
2731   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2732 
2733   MatSeqXAIJGetOptions_Private(b->B);
2734   PetscCall(MatDestroy(&b->B));
2735   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2736   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2737   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2738   MatSeqXAIJRestoreOptions_Private(b->B);
2739 
2740   MatSeqXAIJGetOptions_Private(b->A);
2741   PetscCall(MatDestroy(&b->A));
2742   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2743   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2744   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2745   MatSeqXAIJRestoreOptions_Private(b->A);
2746 
2747   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2748   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2749   B->preallocated  = PETSC_TRUE;
2750   B->was_assembled = PETSC_FALSE;
2751   B->assembled     = PETSC_FALSE;
2752   PetscFunctionReturn(PETSC_SUCCESS);
2753 }
2754 
2755 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2756 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2757 
2758 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2759 {
2760   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2761   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2762   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2763   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2764 
2765   PetscFunctionBegin;
2766   PetscCall(PetscMalloc1(M + 1, &ii));
2767   ii[0] = 0;
2768   for (i = 0; i < M; i++) {
2769     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2770     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2771     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2772     /* remove one from count of matrix has diagonal */
2773     for (j = id[i]; j < id[i + 1]; j++) {
2774       if (jd[j] == i) {
2775         ii[i + 1]--;
2776         break;
2777       }
2778     }
2779   }
2780   PetscCall(PetscMalloc1(ii[M], &jj));
2781   cnt = 0;
2782   for (i = 0; i < M; i++) {
2783     for (j = io[i]; j < io[i + 1]; j++) {
2784       if (garray[jo[j]] > rstart) break;
2785       jj[cnt++] = garray[jo[j]];
2786     }
2787     for (k = id[i]; k < id[i + 1]; k++) {
2788       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2789     }
2790     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2791   }
2792   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2793   PetscFunctionReturn(PETSC_SUCCESS);
2794 }
2795 
2796 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2797 
2798 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2799 
2800 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2801 {
2802   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2803   Mat_MPIAIJ  *b;
2804   Mat          B;
2805 
2806   PetscFunctionBegin;
2807   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2808 
2809   if (reuse == MAT_REUSE_MATRIX) {
2810     B = *newmat;
2811   } else {
2812     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2813     PetscCall(MatSetType(B, MATMPIAIJ));
2814     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2815     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2816     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2817     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2818   }
2819   b = (Mat_MPIAIJ *)B->data;
2820 
2821   if (reuse == MAT_REUSE_MATRIX) {
2822     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2823     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2824   } else {
2825     PetscInt   *garray = a->garray;
2826     Mat_SeqAIJ *bB;
2827     PetscInt    bs, nnz;
2828     PetscCall(MatDestroy(&b->A));
2829     PetscCall(MatDestroy(&b->B));
2830     /* just clear out the data structure */
2831     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2832     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2833     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2834 
2835     /* Global numbering for b->B columns */
2836     bB  = (Mat_SeqAIJ *)b->B->data;
2837     bs  = A->rmap->bs;
2838     nnz = bB->i[A->rmap->n];
2839     for (PetscInt k = 0; k < nnz; k++) {
2840       PetscInt bj = bB->j[k] / bs;
2841       PetscInt br = bB->j[k] % bs;
2842       bB->j[k]    = garray[bj] * bs + br;
2843     }
2844   }
2845   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2846   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2847 
2848   if (reuse == MAT_INPLACE_MATRIX) {
2849     PetscCall(MatHeaderReplace(A, &B));
2850   } else {
2851     *newmat = B;
2852   }
2853   PetscFunctionReturn(PETSC_SUCCESS);
2854 }
2855 
2856 /*MC
2857    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2858 
2859    Options Database Keys:
2860 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2861 . -mat_block_size <bs> - set the blocksize used to store the matrix
2862 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2863 - -mat_use_hash_table <fact> - set hash table factor
2864 
2865    Level: beginner
2866 
2867    Note:
2868     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2869     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2870 
2871 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2872 M*/
2873 
2874 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2875 
2876 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2877 {
2878   Mat_MPIBAIJ *b;
2879   PetscBool    flg = PETSC_FALSE;
2880 
2881   PetscFunctionBegin;
2882   PetscCall(PetscNew(&b));
2883   B->data      = (void *)b;
2884   B->ops[0]    = MatOps_Values;
2885   B->assembled = PETSC_FALSE;
2886 
2887   B->insertmode = NOT_SET_VALUES;
2888   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2889   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2890 
2891   /* build local table of row and column ownerships */
2892   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2893 
2894   /* build cache for off array entries formed */
2895   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2896 
2897   b->donotstash  = PETSC_FALSE;
2898   b->colmap      = NULL;
2899   b->garray      = NULL;
2900   b->roworiented = PETSC_TRUE;
2901 
2902   /* stuff used in block assembly */
2903   b->barray = NULL;
2904 
2905   /* stuff used for matrix vector multiply */
2906   b->lvec  = NULL;
2907   b->Mvctx = NULL;
2908 
2909   /* stuff for MatGetRow() */
2910   b->rowindices   = NULL;
2911   b->rowvalues    = NULL;
2912   b->getrowactive = PETSC_FALSE;
2913 
2914   /* hash table stuff */
2915   b->ht           = NULL;
2916   b->hd           = NULL;
2917   b->ht_size      = 0;
2918   b->ht_flag      = PETSC_FALSE;
2919   b->ht_fact      = 0;
2920   b->ht_total_ct  = 0;
2921   b->ht_insert_ct = 0;
2922 
2923   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2924   b->ijonly = PETSC_FALSE;
2925 
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2929 #if defined(PETSC_HAVE_HYPRE)
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2931 #endif
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2939   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2940 
2941   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2942   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2943   if (flg) {
2944     PetscReal fact = 1.39;
2945     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2946     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2947     if (fact <= 1.0) fact = 1.39;
2948     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2949     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2950   }
2951   PetscOptionsEnd();
2952   PetscFunctionReturn(PETSC_SUCCESS);
2953 }
2954 
2955 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2956 /*MC
2957    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2958 
2959    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2960    and `MATMPIBAIJ` otherwise.
2961 
2962    Options Database Keys:
2963 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2964 
2965   Level: beginner
2966 
2967 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2968 M*/
2969 
2970 /*@
2971   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2972   (block compressed row).
2973 
2974   Collective
2975 
2976   Input Parameters:
2977 + B     - the matrix
2978 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2979           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2980 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2981            submatrix  (same for all local rows)
2982 . d_nnz - array containing the number of block nonzeros in the various block rows
2983            of the in diagonal portion of the local (possibly different for each block
2984            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2985            set it even if it is zero.
2986 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2987            submatrix (same for all local rows).
2988 - o_nnz - array containing the number of nonzeros in the various block rows of the
2989            off-diagonal portion of the local submatrix (possibly different for
2990            each block row) or `NULL`.
2991 
2992    If the *_nnz parameter is given then the *_nz parameter is ignored
2993 
2994   Options Database Keys:
2995 + -mat_block_size            - size of the blocks to use
2996 - -mat_use_hash_table <fact> - set hash table factor
2997 
2998   Level: intermediate
2999 
3000   Notes:
3001   For good matrix assembly performance
3002   the user should preallocate the matrix storage by setting the parameters
3003   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3004   performance can be increased by more than a factor of 50.
3005 
3006   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3007   than it must be used on all processors that share the object for that argument.
3008 
3009   Storage Information:
3010   For a square global matrix we define each processor's diagonal portion
3011   to be its local rows and the corresponding columns (a square submatrix);
3012   each processor's off-diagonal portion encompasses the remainder of the
3013   local matrix (a rectangular submatrix).
3014 
3015   The user can specify preallocated storage for the diagonal part of
3016   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3017   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3018   memory allocation.  Likewise, specify preallocated storage for the
3019   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3020 
3021   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3022   the figure below we depict these three local rows and all columns (0-11).
3023 
3024 .vb
3025            0 1 2 3 4 5 6 7 8 9 10 11
3026           --------------------------
3027    row 3  |o o o d d d o o o o  o  o
3028    row 4  |o o o d d d o o o o  o  o
3029    row 5  |o o o d d d o o o o  o  o
3030           --------------------------
3031 .ve
3032 
3033   Thus, any entries in the d locations are stored in the d (diagonal)
3034   submatrix, and any entries in the o locations are stored in the
3035   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3036   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3037 
3038   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3039   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3040   In general, for PDE problems in which most nonzeros are near the diagonal,
3041   one expects `d_nz` >> `o_nz`.
3042 
3043   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3044   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3045   You can also run with the option `-info` and look for messages with the string
3046   malloc in them to see if additional memory allocation was needed.
3047 
3048 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3049 @*/
3050 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3051 {
3052   PetscFunctionBegin;
3053   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3054   PetscValidType(B, 1);
3055   PetscValidLogicalCollectiveInt(B, bs, 2);
3056   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3057   PetscFunctionReturn(PETSC_SUCCESS);
3058 }
3059 
3060 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3061 /*@
3062   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3063   (block compressed row).
3064 
3065   Collective
3066 
3067   Input Parameters:
3068 + comm  - MPI communicator
3069 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3070           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3071 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3072           This value should be the same as the local size used in creating the
3073           y vector for the matrix-vector product y = Ax.
3074 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3075           This value should be the same as the local size used in creating the
3076           x vector for the matrix-vector product y = Ax.
3077 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3078 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3079 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3080           submatrix  (same for all local rows)
3081 . d_nnz - array containing the number of nonzero blocks in the various block rows
3082           of the in diagonal portion of the local (possibly different for each block
3083           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3084           and set it even if it is zero.
3085 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3086           submatrix (same for all local rows).
3087 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3088           off-diagonal portion of the local submatrix (possibly different for
3089           each block row) or NULL.
3090 
3091   Output Parameter:
3092 . A - the matrix
3093 
3094   Options Database Keys:
3095 + -mat_block_size            - size of the blocks to use
3096 - -mat_use_hash_table <fact> - set hash table factor
3097 
3098   Level: intermediate
3099 
3100   Notes:
3101   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3102   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3103   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3104 
3105   For good matrix assembly performance
3106   the user should preallocate the matrix storage by setting the parameters
3107   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3108   performance can be increased by more than a factor of 50.
3109 
3110   If the *_nnz parameter is given then the *_nz parameter is ignored
3111 
3112   A nonzero block is any block that as 1 or more nonzeros in it
3113 
3114   The user MUST specify either the local or global matrix dimensions
3115   (possibly both).
3116 
3117   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3118   than it must be used on all processors that share the object for that argument.
3119 
3120   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3121   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3122 
3123   Storage Information:
3124   For a square global matrix we define each processor's diagonal portion
3125   to be its local rows and the corresponding columns (a square submatrix);
3126   each processor's off-diagonal portion encompasses the remainder of the
3127   local matrix (a rectangular submatrix).
3128 
3129   The user can specify preallocated storage for the diagonal part of
3130   the local submatrix with either d_nz or d_nnz (not both).  Set
3131   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3132   memory allocation.  Likewise, specify preallocated storage for the
3133   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3134 
3135   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3136   the figure below we depict these three local rows and all columns (0-11).
3137 
3138 .vb
3139            0 1 2 3 4 5 6 7 8 9 10 11
3140           --------------------------
3141    row 3  |o o o d d d o o o o  o  o
3142    row 4  |o o o d d d o o o o  o  o
3143    row 5  |o o o d d d o o o o  o  o
3144           --------------------------
3145 .ve
3146 
3147   Thus, any entries in the d locations are stored in the d (diagonal)
3148   submatrix, and any entries in the o locations are stored in the
3149   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3150   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3151 
3152   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3153   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3154   In general, for PDE problems in which most nonzeros are near the diagonal,
3155   one expects `d_nz` >> `o_nz`.
3156 
3157 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3158           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3159 @*/
3160 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3161 {
3162   PetscMPIInt size;
3163 
3164   PetscFunctionBegin;
3165   PetscCall(MatCreate(comm, A));
3166   PetscCall(MatSetSizes(*A, m, n, M, N));
3167   PetscCallMPI(MPI_Comm_size(comm, &size));
3168   if (size > 1) {
3169     PetscCall(MatSetType(*A, MATMPIBAIJ));
3170     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3171   } else {
3172     PetscCall(MatSetType(*A, MATSEQBAIJ));
3173     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3174   }
3175   PetscFunctionReturn(PETSC_SUCCESS);
3176 }
3177 
3178 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3179 {
3180   Mat          mat;
3181   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3182   PetscInt     len = 0;
3183 
3184   PetscFunctionBegin;
3185   *newmat = NULL;
3186   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3187   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3188   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3189 
3190   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3191   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3192   if (matin->hash_active) {
3193     PetscCall(MatSetUp(mat));
3194   } else {
3195     mat->factortype   = matin->factortype;
3196     mat->preallocated = PETSC_TRUE;
3197     mat->assembled    = PETSC_TRUE;
3198     mat->insertmode   = NOT_SET_VALUES;
3199 
3200     a             = (Mat_MPIBAIJ *)mat->data;
3201     mat->rmap->bs = matin->rmap->bs;
3202     a->bs2        = oldmat->bs2;
3203     a->mbs        = oldmat->mbs;
3204     a->nbs        = oldmat->nbs;
3205     a->Mbs        = oldmat->Mbs;
3206     a->Nbs        = oldmat->Nbs;
3207 
3208     a->size         = oldmat->size;
3209     a->rank         = oldmat->rank;
3210     a->donotstash   = oldmat->donotstash;
3211     a->roworiented  = oldmat->roworiented;
3212     a->rowindices   = NULL;
3213     a->rowvalues    = NULL;
3214     a->getrowactive = PETSC_FALSE;
3215     a->barray       = NULL;
3216     a->rstartbs     = oldmat->rstartbs;
3217     a->rendbs       = oldmat->rendbs;
3218     a->cstartbs     = oldmat->cstartbs;
3219     a->cendbs       = oldmat->cendbs;
3220 
3221     /* hash table stuff */
3222     a->ht           = NULL;
3223     a->hd           = NULL;
3224     a->ht_size      = 0;
3225     a->ht_flag      = oldmat->ht_flag;
3226     a->ht_fact      = oldmat->ht_fact;
3227     a->ht_total_ct  = 0;
3228     a->ht_insert_ct = 0;
3229 
3230     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3231     if (oldmat->colmap) {
3232 #if defined(PETSC_USE_CTABLE)
3233       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3234 #else
3235       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3236       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3237 #endif
3238     } else a->colmap = NULL;
3239 
3240     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3241       PetscCall(PetscMalloc1(len, &a->garray));
3242       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3243     } else a->garray = NULL;
3244 
3245     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3246     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3247     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3248 
3249     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3250     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3251   }
3252   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3253   *newmat = mat;
3254   PetscFunctionReturn(PETSC_SUCCESS);
3255 }
3256 
3257 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3258 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3259 {
3260   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3261   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3262   PetscScalar *matvals;
3263 
3264   PetscFunctionBegin;
3265   PetscCall(PetscViewerSetUp(viewer));
3266 
3267   /* read in matrix header */
3268   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3269   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3270   M  = header[1];
3271   N  = header[2];
3272   nz = header[3];
3273   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3274   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3275   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3276 
3277   /* set block sizes from the viewer's .info file */
3278   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3279   /* set local sizes if not set already */
3280   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3281   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3282   /* set global sizes if not set already */
3283   if (mat->rmap->N < 0) mat->rmap->N = M;
3284   if (mat->cmap->N < 0) mat->cmap->N = N;
3285   PetscCall(PetscLayoutSetUp(mat->rmap));
3286   PetscCall(PetscLayoutSetUp(mat->cmap));
3287 
3288   /* check if the matrix sizes are correct */
3289   PetscCall(MatGetSize(mat, &rows, &cols));
3290   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3291   PetscCall(MatGetBlockSize(mat, &bs));
3292   PetscCall(MatGetLocalSize(mat, &m, &n));
3293   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3294   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3295   mbs = m / bs;
3296   nbs = n / bs;
3297 
3298   /* read in row lengths and build row indices */
3299   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3300   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3301   rowidxs[0] = 0;
3302   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3303   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3304   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3305 
3306   /* read in column indices and matrix values */
3307   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3308   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3309   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3310 
3311   {                /* preallocate matrix storage */
3312     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3313     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3314     PetscBool  sbaij, done;
3315     PetscInt  *d_nnz, *o_nnz;
3316 
3317     PetscCall(PetscBTCreate(nbs, &bt));
3318     PetscCall(PetscHSetICreate(&ht));
3319     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3320     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3321     for (i = 0; i < mbs; i++) {
3322       PetscCall(PetscBTMemzero(nbs, bt));
3323       PetscCall(PetscHSetIClear(ht));
3324       for (k = 0; k < bs; k++) {
3325         PetscInt row = bs * i + k;
3326         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3327           PetscInt col = colidxs[j];
3328           if (!sbaij || col >= row) {
3329             if (col >= cs && col < ce) {
3330               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3331             } else {
3332               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3333               if (done) o_nnz[i]++;
3334             }
3335           }
3336         }
3337       }
3338     }
3339     PetscCall(PetscBTDestroy(&bt));
3340     PetscCall(PetscHSetIDestroy(&ht));
3341     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3342     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3343     PetscCall(PetscFree2(d_nnz, o_nnz));
3344   }
3345 
3346   /* store matrix values */
3347   for (i = 0; i < m; i++) {
3348     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3349     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3350   }
3351 
3352   PetscCall(PetscFree(rowidxs));
3353   PetscCall(PetscFree2(colidxs, matvals));
3354   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3355   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3356   PetscFunctionReturn(PETSC_SUCCESS);
3357 }
3358 
3359 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3360 {
3361   PetscBool isbinary;
3362 
3363   PetscFunctionBegin;
3364   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3365   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3366   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3367   PetscFunctionReturn(PETSC_SUCCESS);
3368 }
3369 
3370 /*@
3371   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3372 
3373   Input Parameters:
3374 + mat  - the matrix
3375 - fact - factor
3376 
3377   Options Database Key:
3378 . -mat_use_hash_table <fact> - provide the factor
3379 
3380   Level: advanced
3381 
3382 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3383 @*/
3384 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3385 {
3386   PetscFunctionBegin;
3387   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3388   PetscFunctionReturn(PETSC_SUCCESS);
3389 }
3390 
3391 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3392 {
3393   Mat_MPIBAIJ *baij;
3394 
3395   PetscFunctionBegin;
3396   baij          = (Mat_MPIBAIJ *)mat->data;
3397   baij->ht_fact = fact;
3398   PetscFunctionReturn(PETSC_SUCCESS);
3399 }
3400 
3401 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3402 {
3403   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3404   PetscBool    flg;
3405 
3406   PetscFunctionBegin;
3407   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3408   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3409   if (Ad) *Ad = a->A;
3410   if (Ao) *Ao = a->B;
3411   if (colmap) *colmap = a->garray;
3412   PetscFunctionReturn(PETSC_SUCCESS);
3413 }
3414 
3415 /*
3416     Special version for direct calls from Fortran (to eliminate two function call overheads
3417 */
3418 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3419   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3420 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3421   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3422 #endif
3423 
3424 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3425 /*@C
3426   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3427 
3428   Collective
3429 
3430   Input Parameters:
3431 + matin  - the matrix
3432 . min    - number of input rows
3433 . im     - input rows
3434 . nin    - number of input columns
3435 . in     - input columns
3436 . v      - numerical values input
3437 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3438 
3439   Level: advanced
3440 
3441   Developer Notes:
3442   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3443 
3444 .seealso: `Mat`, `MatSetValuesBlocked()`
3445 @*/
3446 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3447 {
3448   /* convert input arguments to C version */
3449   Mat        mat = *matin;
3450   PetscInt   m = *min, n = *nin;
3451   InsertMode addv = *addvin;
3452 
3453   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3454   const MatScalar *value;
3455   MatScalar       *barray      = baij->barray;
3456   PetscBool        roworiented = baij->roworiented;
3457   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3458   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3459   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3460 
3461   PetscFunctionBegin;
3462   /* tasks normally handled by MatSetValuesBlocked() */
3463   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3464   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3465   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3466   if (mat->assembled) {
3467     mat->was_assembled = PETSC_TRUE;
3468     mat->assembled     = PETSC_FALSE;
3469   }
3470   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3471 
3472   if (!barray) {
3473     PetscCall(PetscMalloc1(bs2, &barray));
3474     baij->barray = barray;
3475   }
3476 
3477   if (roworiented) stepval = (n - 1) * bs;
3478   else stepval = (m - 1) * bs;
3479 
3480   for (i = 0; i < m; i++) {
3481     if (im[i] < 0) continue;
3482     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3483     if (im[i] >= rstart && im[i] < rend) {
3484       row = im[i] - rstart;
3485       for (j = 0; j < n; j++) {
3486         /* If NumCol = 1 then a copy is not required */
3487         if ((roworiented) && (n == 1)) {
3488           barray = (MatScalar *)v + i * bs2;
3489         } else if ((!roworiented) && (m == 1)) {
3490           barray = (MatScalar *)v + j * bs2;
3491         } else { /* Here a copy is required */
3492           if (roworiented) {
3493             value = v + i * (stepval + bs) * bs + j * bs;
3494           } else {
3495             value = v + j * (stepval + bs) * bs + i * bs;
3496           }
3497           for (ii = 0; ii < bs; ii++, value += stepval) {
3498             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3499           }
3500           barray -= bs2;
3501         }
3502 
3503         if (in[j] >= cstart && in[j] < cend) {
3504           col = in[j] - cstart;
3505           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3506         } else if (in[j] < 0) {
3507           continue;
3508         } else {
3509           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3510           if (mat->was_assembled) {
3511             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3512 
3513 #if defined(PETSC_USE_DEBUG)
3514   #if defined(PETSC_USE_CTABLE)
3515             {
3516               PetscInt data;
3517               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3518               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3519             }
3520   #else
3521             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3522   #endif
3523 #endif
3524 #if defined(PETSC_USE_CTABLE)
3525             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3526             col = (col - 1) / bs;
3527 #else
3528             col = (baij->colmap[in[j]] - 1) / bs;
3529 #endif
3530             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3531               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3532               col = in[j];
3533             }
3534           } else col = in[j];
3535           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3536         }
3537       }
3538     } else {
3539       if (!baij->donotstash) {
3540         if (roworiented) {
3541           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3542         } else {
3543           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3544         }
3545       }
3546     }
3547   }
3548 
3549   /* task normally handled by MatSetValuesBlocked() */
3550   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3551   PetscFunctionReturn(PETSC_SUCCESS);
3552 }
3553 
3554 /*@
3555   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3556 
3557   Collective
3558 
3559   Input Parameters:
3560 + comm - MPI communicator
3561 . bs   - the block size, only a block size of 1 is supported
3562 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3563 . n    - This value should be the same as the local size used in creating the
3564          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3565          calculated if `N` is given) For square matrices `n` is almost always `m`.
3566 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3567 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3568 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3569 . j    - column indices
3570 - a    - matrix values
3571 
3572   Output Parameter:
3573 . mat - the matrix
3574 
3575   Level: intermediate
3576 
3577   Notes:
3578   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3579   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3580   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3581 
3582   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3583   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3584   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3585   with column-major ordering within blocks.
3586 
3587   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3588 
3589 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3590           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3591 @*/
3592 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3593 {
3594   PetscFunctionBegin;
3595   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3596   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3597   PetscCall(MatCreate(comm, mat));
3598   PetscCall(MatSetSizes(*mat, m, n, M, N));
3599   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3600   PetscCall(MatSetBlockSize(*mat, bs));
3601   PetscCall(MatSetUp(*mat));
3602   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3603   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3604   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3605   PetscFunctionReturn(PETSC_SUCCESS);
3606 }
3607 
3608 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3609 {
3610   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3611   PetscInt    *indx;
3612   PetscScalar *values;
3613 
3614   PetscFunctionBegin;
3615   PetscCall(MatGetSize(inmat, &m, &N));
3616   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3617     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3618     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3619     PetscInt    *bindx, rmax = a->rmax, j;
3620     PetscMPIInt  rank, size;
3621 
3622     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3623     mbs = m / bs;
3624     Nbs = N / cbs;
3625     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3626     nbs = n / cbs;
3627 
3628     PetscCall(PetscMalloc1(rmax, &bindx));
3629     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3630 
3631     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3632     PetscCallMPI(MPI_Comm_rank(comm, &size));
3633     if (rank == size - 1) {
3634       /* Check sum(nbs) = Nbs */
3635       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3636     }
3637 
3638     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3639     for (i = 0; i < mbs; i++) {
3640       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3641       nnz = nnz / bs;
3642       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3643       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3644       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3645     }
3646     PetscCall(PetscFree(bindx));
3647 
3648     PetscCall(MatCreate(comm, outmat));
3649     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3650     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3651     PetscCall(MatSetType(*outmat, MATBAIJ));
3652     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3653     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3654     MatPreallocateEnd(dnz, onz);
3655     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3656   }
3657 
3658   /* numeric phase */
3659   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3660   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3661 
3662   for (i = 0; i < m; i++) {
3663     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3664     Ii = i + rstart;
3665     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3666     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3667   }
3668   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3669   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3670   PetscFunctionReturn(PETSC_SUCCESS);
3671 }
3672