xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 3d1372b23971def3aed7e3dc12090948688700a0)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
663     row = idxm[i] - bsrstart;
664     for (j = 0; j < n; j++) {
665       if (idxn[j] < 0) continue; /* negative column */
666       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667       if (idxn[j] >= bscstart && idxn[j] < bscend) {
668         col = idxn[j] - bscstart;
669         PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670       } else {
671         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674         data--;
675 #else
676         data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
679         else {
680           col = data + idxn[j] % bs;
681           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682         }
683       }
684     }
685   }
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
690 {
691   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
692   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
693   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
694   PetscReal    sum = 0.0;
695   MatScalar   *v;
696 
697   PetscFunctionBegin;
698   if (baij->size == 1) {
699     PetscCall(MatNorm(baij->A, type, nrm));
700   } else {
701     if (type == NORM_FROBENIUS) {
702       v  = amat->a;
703       nz = amat->nz * bs2;
704       for (i = 0; i < nz; i++) {
705         sum += PetscRealPart(PetscConj(*v) * (*v));
706         v++;
707       }
708       v  = bmat->a;
709       nz = bmat->nz * bs2;
710       for (i = 0; i < nz; i++) {
711         sum += PetscRealPart(PetscConj(*v) * (*v));
712         v++;
713       }
714       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
715       *nrm = PetscSqrtReal(*nrm);
716     } else if (type == NORM_1) { /* max column sum */
717       Vec          col, bcol;
718       PetscScalar *array;
719       PetscInt    *jj, *garray = baij->garray;
720 
721       PetscCall(MatCreateVecs(mat, &col, NULL));
722       PetscCall(VecSet(col, 0.0));
723       PetscCall(VecGetArrayWrite(col, &array));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           PetscInt col = bs * *jj + j; /* column index */
729 
730           for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++);
731         }
732         jj++;
733       }
734       PetscCall(VecRestoreArrayWrite(col, &array));
735       PetscCall(MatCreateVecs(baij->B, &bcol, NULL));
736       PetscCall(VecSet(bcol, 0.0));
737       PetscCall(VecGetArrayWrite(bcol, &array));
738       v  = bmat->a;
739       jj = bmat->j;
740       for (i = 0; i < bmat->nz; i++) {
741         for (j = 0; j < bs; j++) {
742           PetscInt col = bs * *jj + j; /* column index */
743 
744           for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++);
745         }
746         jj++;
747       }
748       PetscCall(VecSetValuesBlocked(col, bmat->nbs, garray, array, ADD_VALUES));
749       PetscCall(VecRestoreArrayWrite(bcol, &array));
750       PetscCall(VecDestroy(&bcol));
751       PetscCall(VecAssemblyBegin(col));
752       PetscCall(VecAssemblyEnd(col));
753       PetscCall(VecNorm(col, NORM_INFINITY, nrm));
754       PetscCall(VecDestroy(&col));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entries need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, all_assembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the row-oriented flag to column-oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented                           = PETSC_FALSE;
938     a->roworiented                              = PETSC_FALSE;
939     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
940     while (1) {
941       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
942       if (!flg) break;
943 
944       for (i = 0; i < n;) {
945         /* Now identify the consecutive vals belonging to the same row */
946         for (j = i, rstart = row[j]; j < n; j++) {
947           if (row[j] != rstart) break;
948         }
949         if (j < n) ncols = j - i;
950         else ncols = n - i;
951         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
952         i = j;
953       }
954     }
955     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
956 
957     baij->roworiented                           = r1;
958     a->roworiented                              = r2;
959     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
960   }
961 
962   PetscCall(MatAssemblyBegin(baij->A, mode));
963   PetscCall(MatAssemblyEnd(baij->A, mode));
964 
965   /* determine if any process has disassembled, if so we must
966      also disassemble ourselves, in order that we may reassemble. */
967   /*
968      if nonzero structure of submatrix B cannot change then we know that
969      no process disassembled thus we can skip this stuff
970   */
971   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
972     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
973     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
974   }
975 
976   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
977   PetscCall(MatAssemblyBegin(baij->B, mode));
978   PetscCall(MatAssemblyEnd(baij->B, mode));
979 
980 #if defined(PETSC_USE_INFO)
981   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
982     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
983 
984     baij->ht_total_ct  = 0;
985     baij->ht_insert_ct = 0;
986   }
987 #endif
988   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
989     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
990 
991     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
992     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
993   }
994 
995   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
996 
997   baij->rowvalues = NULL;
998 
999   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1000   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
1001     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1002     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 #include <petscdraw.h>
1008 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1009 {
1010   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1011   PetscMPIInt       rank = baij->rank;
1012   PetscInt          bs   = mat->rmap->bs;
1013   PetscBool         isascii, isdraw;
1014   PetscViewer       sviewer;
1015   PetscViewerFormat format;
1016 
1017   PetscFunctionBegin;
1018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1020   if (isascii) {
1021     PetscCall(PetscViewerGetFormat(viewer, &format));
1022     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1023       MatInfo info;
1024       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1025       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1026       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1027       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1028                                                    mat->rmap->bs, info.memory));
1029       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1030       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1031       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1032       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1033       PetscCall(PetscViewerFlush(viewer));
1034       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1035       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1036       PetscCall(VecScatterView(baij->Mvctx, viewer));
1037       PetscFunctionReturn(PETSC_SUCCESS);
1038     } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_FACTOR_INFO) PetscFunctionReturn(PETSC_SUCCESS);
1039   }
1040 
1041   if (isdraw) {
1042     PetscDraw draw;
1043     PetscBool isnull;
1044     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1045     PetscCall(PetscDrawIsNull(draw, &isnull));
1046     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1047   }
1048 
1049   {
1050     /* assemble the entire matrix onto first processor. */
1051     Mat          A;
1052     Mat_SeqBAIJ *Aloc;
1053     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1054     MatScalar   *a;
1055     const char  *matname;
1056 
1057     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1058     /* Perhaps this should be the type of mat? */
1059     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1060     if (rank == 0) {
1061       PetscCall(MatSetSizes(A, M, N, M, N));
1062     } else {
1063       PetscCall(MatSetSizes(A, 0, 0, M, N));
1064     }
1065     PetscCall(MatSetType(A, MATMPIBAIJ));
1066     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1067     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1068 
1069     /* copy over the A part */
1070     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1071     ai   = Aloc->i;
1072     aj   = Aloc->j;
1073     a    = Aloc->a;
1074     PetscCall(PetscMalloc1(bs, &rvals));
1075 
1076     for (i = 0; i < mbs; i++) {
1077       rvals[0] = bs * (baij->rstartbs + i);
1078       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1079       for (j = ai[i]; j < ai[i + 1]; j++) {
1080         col = (baij->cstartbs + aj[j]) * bs;
1081         for (k = 0; k < bs; k++) {
1082           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1083           col++;
1084           a += bs;
1085         }
1086       }
1087     }
1088     /* copy over the B part */
1089     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1090     ai   = Aloc->i;
1091     aj   = Aloc->j;
1092     a    = Aloc->a;
1093     for (i = 0; i < mbs; i++) {
1094       rvals[0] = bs * (baij->rstartbs + i);
1095       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1096       for (j = ai[i]; j < ai[i + 1]; j++) {
1097         col = baij->garray[aj[j]] * bs;
1098         for (k = 0; k < bs; k++) {
1099           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1100           col++;
1101           a += bs;
1102         }
1103       }
1104     }
1105     PetscCall(PetscFree(rvals));
1106     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1107     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1108     /*
1109        Everyone has to call to draw the matrix since the graphics waits are
1110        synchronized across all processors that share the PetscDraw object
1111     */
1112     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1113     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1114     if (rank == 0) {
1115       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1116       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1117     }
1118     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1119     PetscCall(MatDestroy(&A));
1120   }
1121   PetscFunctionReturn(PETSC_SUCCESS);
1122 }
1123 
1124 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1125 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1126 {
1127   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1128   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1129   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1130   const PetscInt *garray = aij->garray;
1131   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1132   PetscCount      nz, hnz;
1133   PetscInt       *rowlens, *colidxs;
1134   PetscScalar    *matvals;
1135   PetscMPIInt     rank;
1136 
1137   PetscFunctionBegin;
1138   PetscCall(PetscViewerSetUp(viewer));
1139 
1140   M  = mat->rmap->N;
1141   N  = mat->cmap->N;
1142   m  = mat->rmap->n;
1143   rs = mat->rmap->rstart;
1144   cs = mat->cmap->rstart;
1145   bs = mat->rmap->bs;
1146   nz = bs * bs * (A->nz + B->nz);
1147 
1148   /* write matrix header */
1149   header[0] = MAT_FILE_CLASSID;
1150   header[1] = M;
1151   header[2] = N;
1152   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1153   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1154   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1155   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1156 
1157   /* fill in and store row lengths */
1158   PetscCall(PetscMalloc1(m, &rowlens));
1159   for (cnt = 0, i = 0; i < A->mbs; i++)
1160     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1161   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1162   PetscCall(PetscFree(rowlens));
1163 
1164   /* fill in and store column indices */
1165   PetscCall(PetscMalloc1(nz, &colidxs));
1166   for (cnt = 0, i = 0; i < A->mbs; i++) {
1167     for (k = 0; k < bs; k++) {
1168       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1169         if (garray[B->j[jb]] > cs / bs) break;
1170         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1171       }
1172       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1173         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1174       for (; jb < B->i[i + 1]; jb++)
1175         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1176     }
1177   }
1178   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1179   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1180   PetscCall(PetscFree(colidxs));
1181 
1182   /* fill in and store nonzero values */
1183   PetscCall(PetscMalloc1(nz, &matvals));
1184   for (cnt = 0, i = 0; i < A->mbs; i++) {
1185     for (k = 0; k < bs; k++) {
1186       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1187         if (garray[B->j[jb]] > cs / bs) break;
1188         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1189       }
1190       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1191         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1192       for (; jb < B->i[i + 1]; jb++)
1193         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1194     }
1195   }
1196   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1197   PetscCall(PetscFree(matvals));
1198 
1199   /* write block size option to the viewer's .info file */
1200   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
1204 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1205 {
1206   PetscBool isascii, isdraw, issocket, isbinary;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1210   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1211   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1212   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1213   if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1214   else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1215   PetscFunctionReturn(PETSC_SUCCESS);
1216 }
1217 
1218 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1219 {
1220   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1221   PetscInt     nt;
1222 
1223   PetscFunctionBegin;
1224   PetscCall(VecGetLocalSize(xx, &nt));
1225   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1226   PetscCall(VecGetLocalSize(yy, &nt));
1227   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1228   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1229   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1230   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1231   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1232   PetscFunctionReturn(PETSC_SUCCESS);
1233 }
1234 
1235 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1236 {
1237   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1238 
1239   PetscFunctionBegin;
1240   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1241   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1242   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1243   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1244   PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246 
1247 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1248 {
1249   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1250 
1251   PetscFunctionBegin;
1252   /* do nondiagonal part */
1253   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1254   /* do local part */
1255   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1256   /* add partial results together */
1257   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1258   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1259   PetscFunctionReturn(PETSC_SUCCESS);
1260 }
1261 
1262 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1263 {
1264   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1265 
1266   PetscFunctionBegin;
1267   /* do nondiagonal part */
1268   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1269   /* do local part */
1270   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1271   /* add partial results together */
1272   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1273   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 /*
1278   This only works correctly for square matrices where the subblock A->A is the
1279    diagonal block
1280 */
1281 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1282 {
1283   PetscFunctionBegin;
1284   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1285   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1286   PetscFunctionReturn(PETSC_SUCCESS);
1287 }
1288 
1289 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1290 {
1291   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1292 
1293   PetscFunctionBegin;
1294   PetscCall(MatScale(a->A, aa));
1295   PetscCall(MatScale(a->B, aa));
1296   PetscFunctionReturn(PETSC_SUCCESS);
1297 }
1298 
1299 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1300 {
1301   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1302   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1303   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1304   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1305   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1306 
1307   PetscFunctionBegin;
1308   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1309   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1310   mat->getrowactive = PETSC_TRUE;
1311 
1312   if (!mat->rowvalues && (idx || v)) {
1313     /*
1314         allocate enough space to hold information from the longest row.
1315     */
1316     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1317     PetscInt     max = 1, mbs = mat->mbs, tmp;
1318     for (i = 0; i < mbs; i++) {
1319       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1320       if (max < tmp) max = tmp;
1321     }
1322     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1323   }
1324   lrow = row - brstart;
1325 
1326   pvA = &vworkA;
1327   pcA = &cworkA;
1328   pvB = &vworkB;
1329   pcB = &cworkB;
1330   if (!v) {
1331     pvA = NULL;
1332     pvB = NULL;
1333   }
1334   if (!idx) {
1335     pcA = NULL;
1336     if (!v) pcB = NULL;
1337   }
1338   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1339   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1340   nztot = nzA + nzB;
1341 
1342   cmap = mat->garray;
1343   if (v || idx) {
1344     if (nztot) {
1345       /* Sort by increasing column numbers, assuming A and B already sorted */
1346       PetscInt imark = -1;
1347       if (v) {
1348         *v = v_p = mat->rowvalues;
1349         for (i = 0; i < nzB; i++) {
1350           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1351           else break;
1352         }
1353         imark = i;
1354         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1355         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1356       }
1357       if (idx) {
1358         *idx = idx_p = mat->rowindices;
1359         if (imark > -1) {
1360           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1361         } else {
1362           for (i = 0; i < nzB; i++) {
1363             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1364             else break;
1365           }
1366           imark = i;
1367         }
1368         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1369         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1370       }
1371     } else {
1372       if (idx) *idx = NULL;
1373       if (v) *v = NULL;
1374     }
1375   }
1376   *nz = nztot;
1377   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1378   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1379   PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381 
1382 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1383 {
1384   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1385 
1386   PetscFunctionBegin;
1387   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1388   baij->getrowactive = PETSC_FALSE;
1389   PetscFunctionReturn(PETSC_SUCCESS);
1390 }
1391 
1392 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1393 {
1394   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1395 
1396   PetscFunctionBegin;
1397   PetscCall(MatZeroEntries(l->A));
1398   PetscCall(MatZeroEntries(l->B));
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
1402 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1403 {
1404   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1405   Mat            A = a->A, B = a->B;
1406   PetscLogDouble isend[5], irecv[5];
1407 
1408   PetscFunctionBegin;
1409   info->block_size = (PetscReal)matin->rmap->bs;
1410 
1411   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1412 
1413   isend[0] = info->nz_used;
1414   isend[1] = info->nz_allocated;
1415   isend[2] = info->nz_unneeded;
1416   isend[3] = info->memory;
1417   isend[4] = info->mallocs;
1418 
1419   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1420 
1421   isend[0] += info->nz_used;
1422   isend[1] += info->nz_allocated;
1423   isend[2] += info->nz_unneeded;
1424   isend[3] += info->memory;
1425   isend[4] += info->mallocs;
1426 
1427   if (flag == MAT_LOCAL) {
1428     info->nz_used      = isend[0];
1429     info->nz_allocated = isend[1];
1430     info->nz_unneeded  = isend[2];
1431     info->memory       = isend[3];
1432     info->mallocs      = isend[4];
1433   } else if (flag == MAT_GLOBAL_MAX) {
1434     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1435 
1436     info->nz_used      = irecv[0];
1437     info->nz_allocated = irecv[1];
1438     info->nz_unneeded  = irecv[2];
1439     info->memory       = irecv[3];
1440     info->mallocs      = irecv[4];
1441   } else if (flag == MAT_GLOBAL_SUM) {
1442     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1443 
1444     info->nz_used      = irecv[0];
1445     info->nz_allocated = irecv[1];
1446     info->nz_unneeded  = irecv[2];
1447     info->memory       = irecv[3];
1448     info->mallocs      = irecv[4];
1449   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1450   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1451   info->fill_ratio_needed = 0;
1452   info->factor_mallocs    = 0;
1453   PetscFunctionReturn(PETSC_SUCCESS);
1454 }
1455 
1456 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1457 {
1458   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1459 
1460   PetscFunctionBegin;
1461   switch (op) {
1462   case MAT_NEW_NONZERO_LOCATIONS:
1463   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1464   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1465   case MAT_KEEP_NONZERO_PATTERN:
1466   case MAT_NEW_NONZERO_LOCATION_ERR:
1467     MatCheckPreallocated(A, 1);
1468     PetscCall(MatSetOption(a->A, op, flg));
1469     PetscCall(MatSetOption(a->B, op, flg));
1470     break;
1471   case MAT_ROW_ORIENTED:
1472     MatCheckPreallocated(A, 1);
1473     a->roworiented = flg;
1474 
1475     PetscCall(MatSetOption(a->A, op, flg));
1476     PetscCall(MatSetOption(a->B, op, flg));
1477     break;
1478   case MAT_IGNORE_OFF_PROC_ENTRIES:
1479     a->donotstash = flg;
1480     break;
1481   case MAT_USE_HASH_TABLE:
1482     a->ht_flag = flg;
1483     a->ht_fact = 1.39;
1484     break;
1485   case MAT_SYMMETRIC:
1486   case MAT_STRUCTURALLY_SYMMETRIC:
1487   case MAT_HERMITIAN:
1488   case MAT_SYMMETRY_ETERNAL:
1489   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1490   case MAT_SPD_ETERNAL:
1491     /* if the diagonal matrix is square it inherits some of the properties above */
1492     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1493     break;
1494   default:
1495     break;
1496   }
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
1500 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1501 {
1502   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1503   Mat_SeqBAIJ *Aloc;
1504   Mat          B;
1505   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1506   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1507   MatScalar   *a;
1508 
1509   PetscFunctionBegin;
1510   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1511   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1512     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1513     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1514     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1515     /* Do not know preallocation information, but must set block size */
1516     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1517   } else {
1518     B = *matout;
1519   }
1520 
1521   /* copy over the A part */
1522   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1523   ai   = Aloc->i;
1524   aj   = Aloc->j;
1525   a    = Aloc->a;
1526   PetscCall(PetscMalloc1(bs, &rvals));
1527 
1528   for (i = 0; i < mbs; i++) {
1529     rvals[0] = bs * (baij->rstartbs + i);
1530     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1531     for (j = ai[i]; j < ai[i + 1]; j++) {
1532       col = (baij->cstartbs + aj[j]) * bs;
1533       for (k = 0; k < bs; k++) {
1534         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1535 
1536         col++;
1537         a += bs;
1538       }
1539     }
1540   }
1541   /* copy over the B part */
1542   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1543   ai   = Aloc->i;
1544   aj   = Aloc->j;
1545   a    = Aloc->a;
1546   for (i = 0; i < mbs; i++) {
1547     rvals[0] = bs * (baij->rstartbs + i);
1548     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1549     for (j = ai[i]; j < ai[i + 1]; j++) {
1550       col = baij->garray[aj[j]] * bs;
1551       for (k = 0; k < bs; k++) {
1552         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1553         col++;
1554         a += bs;
1555       }
1556     }
1557   }
1558   PetscCall(PetscFree(rvals));
1559   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1560   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1561 
1562   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1563   else PetscCall(MatHeaderMerge(A, &B));
1564   PetscFunctionReturn(PETSC_SUCCESS);
1565 }
1566 
1567 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1568 {
1569   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1570   Mat          a = baij->A, b = baij->B;
1571   PetscInt     s1, s2, s3;
1572 
1573   PetscFunctionBegin;
1574   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1575   if (rr) {
1576     PetscCall(VecGetLocalSize(rr, &s1));
1577     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1578     /* Overlap communication with computation. */
1579     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1580   }
1581   if (ll) {
1582     PetscCall(VecGetLocalSize(ll, &s1));
1583     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1584     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1585   }
1586   /* scale  the diagonal block */
1587   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1588 
1589   if (rr) {
1590     /* Do a scatter end and then right scale the off-diagonal block */
1591     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1592     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1593   }
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1598 {
1599   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1600   PetscInt    *lrows;
1601   PetscInt     r, len;
1602   PetscBool    cong;
1603 
1604   PetscFunctionBegin;
1605   /* get locally owned rows */
1606   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1607   /* fix right-hand side if needed */
1608   if (x && b) {
1609     const PetscScalar *xx;
1610     PetscScalar       *bb;
1611 
1612     PetscCall(VecGetArrayRead(x, &xx));
1613     PetscCall(VecGetArray(b, &bb));
1614     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1615     PetscCall(VecRestoreArrayRead(x, &xx));
1616     PetscCall(VecRestoreArray(b, &bb));
1617   }
1618 
1619   /* actually zap the local rows */
1620   /*
1621         Zero the required rows. If the "diagonal block" of the matrix
1622      is square and the user wishes to set the diagonal we use separate
1623      code so that MatSetValues() is not called for each diagonal allocating
1624      new memory, thus calling lots of mallocs and slowing things down.
1625 
1626   */
1627   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1628   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1629   PetscCall(MatHasCongruentLayouts(A, &cong));
1630   if ((diag != 0.0) && cong) {
1631     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1632   } else if (diag != 0.0) {
1633     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1634     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1635     for (r = 0; r < len; ++r) {
1636       const PetscInt row = lrows[r] + A->rmap->rstart;
1637       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1638     }
1639     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1640     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1641   } else {
1642     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1643   }
1644   PetscCall(PetscFree(lrows));
1645 
1646   /* only change matrix nonzero state if pattern was allowed to be changed */
1647   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1648     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1649     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1650   }
1651   PetscFunctionReturn(PETSC_SUCCESS);
1652 }
1653 
1654 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1655 {
1656   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1657   PetscMPIInt        n, p = 0;
1658   PetscInt           i, j, k, r, len = 0, row, col, count;
1659   PetscInt          *lrows, *owners = A->rmap->range;
1660   PetscSFNode       *rrows;
1661   PetscSF            sf;
1662   const PetscScalar *xx;
1663   PetscScalar       *bb, *mask;
1664   Vec                xmask, lmask;
1665   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1666   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1667   PetscScalar       *aa;
1668 
1669   PetscFunctionBegin;
1670   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1671   /* Create SF where leaves are input rows and roots are owned rows */
1672   PetscCall(PetscMalloc1(n, &lrows));
1673   for (r = 0; r < n; ++r) lrows[r] = -1;
1674   PetscCall(PetscMalloc1(N, &rrows));
1675   for (r = 0; r < N; ++r) {
1676     const PetscInt idx = rows[r];
1677     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1678     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1679       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1680     }
1681     rrows[r].rank  = p;
1682     rrows[r].index = rows[r] - owners[p];
1683   }
1684   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1685   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1686   /* Collect flags for rows to be zeroed */
1687   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1688   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1689   PetscCall(PetscSFDestroy(&sf));
1690   /* Compress and put in row numbers */
1691   for (r = 0; r < n; ++r)
1692     if (lrows[r] >= 0) lrows[len++] = r;
1693   /* zero diagonal part of matrix */
1694   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1695   /* handle off-diagonal part of matrix */
1696   PetscCall(MatCreateVecs(A, &xmask, NULL));
1697   PetscCall(VecDuplicate(l->lvec, &lmask));
1698   PetscCall(VecGetArray(xmask, &bb));
1699   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1700   PetscCall(VecRestoreArray(xmask, &bb));
1701   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1702   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1703   PetscCall(VecDestroy(&xmask));
1704   if (x) {
1705     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1706     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1707     PetscCall(VecGetArrayRead(l->lvec, &xx));
1708     PetscCall(VecGetArray(b, &bb));
1709   }
1710   PetscCall(VecGetArray(lmask, &mask));
1711   /* remove zeroed rows of off-diagonal matrix */
1712   for (i = 0; i < len; ++i) {
1713     row   = lrows[i];
1714     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1715     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1716     for (k = 0; k < count; ++k) {
1717       aa[0] = 0.0;
1718       aa += bs;
1719     }
1720   }
1721   /* loop over all elements of off process part of matrix zeroing removed columns*/
1722   for (i = 0; i < l->B->rmap->N; ++i) {
1723     row = i / bs;
1724     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1725       for (k = 0; k < bs; ++k) {
1726         col = bs * baij->j[j] + k;
1727         if (PetscAbsScalar(mask[col])) {
1728           aa = baij->a + j * bs2 + (i % bs) + bs * k;
1729           if (x) bb[i] -= aa[0] * xx[col];
1730           aa[0] = 0.0;
1731         }
1732       }
1733     }
1734   }
1735   if (x) {
1736     PetscCall(VecRestoreArray(b, &bb));
1737     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1738   }
1739   PetscCall(VecRestoreArray(lmask, &mask));
1740   PetscCall(VecDestroy(&lmask));
1741   PetscCall(PetscFree(lrows));
1742 
1743   /* only change matrix nonzero state if pattern was allowed to be changed */
1744   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1745     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1746     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1747   }
1748   PetscFunctionReturn(PETSC_SUCCESS);
1749 }
1750 
1751 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1752 {
1753   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1754 
1755   PetscFunctionBegin;
1756   PetscCall(MatSetUnfactored(a->A));
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1761 
1762 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1763 {
1764   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1765   Mat          a, b, c, d;
1766   PetscBool    flg;
1767 
1768   PetscFunctionBegin;
1769   a = matA->A;
1770   b = matA->B;
1771   c = matB->A;
1772   d = matB->B;
1773 
1774   PetscCall(MatEqual(a, c, &flg));
1775   if (flg) PetscCall(MatEqual(b, d, &flg));
1776   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1777   PetscFunctionReturn(PETSC_SUCCESS);
1778 }
1779 
1780 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1781 {
1782   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1783   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1784 
1785   PetscFunctionBegin;
1786   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1787   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1788     PetscCall(MatCopy_Basic(A, B, str));
1789   } else {
1790     PetscCall(MatCopy(a->A, b->A, str));
1791     PetscCall(MatCopy(a->B, b->B, str));
1792   }
1793   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1798 {
1799   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1800   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1801   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1802 
1803   PetscFunctionBegin;
1804   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1809 {
1810   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1811   PetscBLASInt bnz, one                         = 1;
1812   Mat_SeqBAIJ *x, *y;
1813   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1814 
1815   PetscFunctionBegin;
1816   if (str == SAME_NONZERO_PATTERN) {
1817     PetscScalar alpha = a;
1818     x                 = (Mat_SeqBAIJ *)xx->A->data;
1819     y                 = (Mat_SeqBAIJ *)yy->A->data;
1820     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1821     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1822     x = (Mat_SeqBAIJ *)xx->B->data;
1823     y = (Mat_SeqBAIJ *)yy->B->data;
1824     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1825     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1826     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1827   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1828     PetscCall(MatAXPY_Basic(Y, a, X, str));
1829   } else {
1830     Mat       B;
1831     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1832     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1833     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1834     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1835     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1836     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1837     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1838     PetscCall(MatSetType(B, MATMPIBAIJ));
1839     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1840     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1841     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1842     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1843     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1844     PetscCall(MatHeaderMerge(Y, &B));
1845     PetscCall(PetscFree(nnz_d));
1846     PetscCall(PetscFree(nnz_o));
1847   }
1848   PetscFunctionReturn(PETSC_SUCCESS);
1849 }
1850 
1851 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1852 {
1853   PetscFunctionBegin;
1854   if (PetscDefined(USE_COMPLEX)) {
1855     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1856 
1857     PetscCall(MatConjugate_SeqBAIJ(a->A));
1858     PetscCall(MatConjugate_SeqBAIJ(a->B));
1859   }
1860   PetscFunctionReturn(PETSC_SUCCESS);
1861 }
1862 
1863 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1864 {
1865   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1866 
1867   PetscFunctionBegin;
1868   PetscCall(MatRealPart(a->A));
1869   PetscCall(MatRealPart(a->B));
1870   PetscFunctionReturn(PETSC_SUCCESS);
1871 }
1872 
1873 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1874 {
1875   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1876 
1877   PetscFunctionBegin;
1878   PetscCall(MatImaginaryPart(a->A));
1879   PetscCall(MatImaginaryPart(a->B));
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1884 {
1885   IS       iscol_local;
1886   PetscInt csize;
1887 
1888   PetscFunctionBegin;
1889   PetscCall(ISGetLocalSize(iscol, &csize));
1890   if (call == MAT_REUSE_MATRIX) {
1891     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1892     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1893   } else {
1894     PetscCall(ISAllGather(iscol, &iscol_local));
1895   }
1896   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1897   if (call == MAT_INITIAL_MATRIX) {
1898     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1899     PetscCall(ISDestroy(&iscol_local));
1900   }
1901   PetscFunctionReturn(PETSC_SUCCESS);
1902 }
1903 
1904 /*
1905   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1906   in local and then by concatenating the local matrices the end result.
1907   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1908   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1909 */
1910 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1911 {
1912   PetscMPIInt  rank, size;
1913   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1914   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1915   Mat          M, Mreuse;
1916   MatScalar   *vwork, *aa;
1917   MPI_Comm     comm;
1918   IS           isrow_new, iscol_new;
1919   Mat_SeqBAIJ *aij;
1920 
1921   PetscFunctionBegin;
1922   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1923   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1924   PetscCallMPI(MPI_Comm_size(comm, &size));
1925   /* The compression and expansion should be avoided. Doesn't point
1926      out errors, might change the indices, hence buggey */
1927   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1928   if (isrow == iscol) {
1929     iscol_new = isrow_new;
1930     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1931   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1932 
1933   if (call == MAT_REUSE_MATRIX) {
1934     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1935     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1936     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1937   } else {
1938     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1939   }
1940   PetscCall(ISDestroy(&isrow_new));
1941   PetscCall(ISDestroy(&iscol_new));
1942   /*
1943       m - number of local rows
1944       n - number of columns (same on all processors)
1945       rstart - first row in new global matrix generated
1946   */
1947   PetscCall(MatGetBlockSize(mat, &bs));
1948   PetscCall(MatGetSize(Mreuse, &m, &n));
1949   m = m / bs;
1950   n = n / bs;
1951 
1952   if (call == MAT_INITIAL_MATRIX) {
1953     aij = (Mat_SeqBAIJ *)Mreuse->data;
1954     ii  = aij->i;
1955     jj  = aij->j;
1956 
1957     /*
1958         Determine the number of non-zeros in the diagonal and off-diagonal
1959         portions of the matrix in order to do correct preallocation
1960     */
1961 
1962     /* first get start and end of "diagonal" columns */
1963     if (csize == PETSC_DECIDE) {
1964       PetscCall(ISGetSize(isrow, &mglobal));
1965       if (mglobal == n * bs) { /* square matrix */
1966         nlocal = m;
1967       } else {
1968         nlocal = n / size + ((n % size) > rank);
1969       }
1970     } else {
1971       nlocal = csize / bs;
1972     }
1973     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1974     rstart = rend - nlocal;
1975     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1976 
1977     /* next, compute all the lengths */
1978     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1979     for (i = 0; i < m; i++) {
1980       jend = ii[i + 1] - ii[i];
1981       olen = 0;
1982       dlen = 0;
1983       for (j = 0; j < jend; j++) {
1984         if (*jj < rstart || *jj >= rend) olen++;
1985         else dlen++;
1986         jj++;
1987       }
1988       olens[i] = olen;
1989       dlens[i] = dlen;
1990     }
1991     PetscCall(MatCreate(comm, &M));
1992     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1993     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1994     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1995     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1996     PetscCall(PetscFree2(dlens, olens));
1997   } else {
1998     PetscInt ml, nl;
1999 
2000     M = *newmat;
2001     PetscCall(MatGetLocalSize(M, &ml, &nl));
2002     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2003     PetscCall(MatZeroEntries(M));
2004     /*
2005          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2006        rather than the slower MatSetValues().
2007     */
2008     M->was_assembled = PETSC_TRUE;
2009     M->assembled     = PETSC_FALSE;
2010   }
2011   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2012   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2013   aij = (Mat_SeqBAIJ *)Mreuse->data;
2014   ii  = aij->i;
2015   jj  = aij->j;
2016   aa  = aij->a;
2017   for (i = 0; i < m; i++) {
2018     row   = rstart / bs + i;
2019     nz    = ii[i + 1] - ii[i];
2020     cwork = jj;
2021     jj    = PetscSafePointerPlusOffset(jj, nz);
2022     vwork = aa;
2023     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2024     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2025   }
2026 
2027   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2028   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2029   *newmat = M;
2030 
2031   /* save submatrix used in processor for next request */
2032   if (call == MAT_INITIAL_MATRIX) {
2033     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2034     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2035   }
2036   PetscFunctionReturn(PETSC_SUCCESS);
2037 }
2038 
2039 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2040 {
2041   MPI_Comm        comm, pcomm;
2042   PetscInt        clocal_size, nrows;
2043   const PetscInt *rows;
2044   PetscMPIInt     size;
2045   IS              crowp, lcolp;
2046 
2047   PetscFunctionBegin;
2048   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2049   /* make a collective version of 'rowp' */
2050   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2051   if (pcomm == comm) {
2052     crowp = rowp;
2053   } else {
2054     PetscCall(ISGetSize(rowp, &nrows));
2055     PetscCall(ISGetIndices(rowp, &rows));
2056     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2057     PetscCall(ISRestoreIndices(rowp, &rows));
2058   }
2059   PetscCall(ISSetPermutation(crowp));
2060   /* make a local version of 'colp' */
2061   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2062   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2063   if (size == 1) {
2064     lcolp = colp;
2065   } else {
2066     PetscCall(ISAllGather(colp, &lcolp));
2067   }
2068   PetscCall(ISSetPermutation(lcolp));
2069   /* now we just get the submatrix */
2070   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2071   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2072   /* clean up */
2073   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2074   if (size > 1) PetscCall(ISDestroy(&lcolp));
2075   PetscFunctionReturn(PETSC_SUCCESS);
2076 }
2077 
2078 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2079 {
2080   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2081   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2082 
2083   PetscFunctionBegin;
2084   if (nghosts) *nghosts = B->nbs;
2085   if (ghosts) *ghosts = baij->garray;
2086   PetscFunctionReturn(PETSC_SUCCESS);
2087 }
2088 
2089 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2090 {
2091   Mat          B;
2092   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2093   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2094   Mat_SeqAIJ  *b;
2095   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2096   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2097   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2098 
2099   PetscFunctionBegin;
2100   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2101   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2102 
2103   /*   Tell every processor the number of nonzeros per row  */
2104   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2105   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2106   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2107   displs = recvcounts + size;
2108   for (i = 0; i < size; i++) {
2109     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2110     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2111   }
2112   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2113   /* Create the sequential matrix of the same type as the local block diagonal  */
2114   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2115   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2116   PetscCall(MatSetType(B, MATSEQAIJ));
2117   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2118   b = (Mat_SeqAIJ *)B->data;
2119 
2120   /*     Copy my part of matrix column indices over  */
2121   sendcount  = ad->nz + bd->nz;
2122   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2123   a_jsendbuf = ad->j;
2124   b_jsendbuf = bd->j;
2125   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2126   cnt        = 0;
2127   for (i = 0; i < n; i++) {
2128     /* put in lower diagonal portion */
2129     m = bd->i[i + 1] - bd->i[i];
2130     while (m > 0) {
2131       /* is it above diagonal (in bd (compressed) numbering) */
2132       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2133       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2134       m--;
2135     }
2136 
2137     /* put in diagonal portion */
2138     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2139 
2140     /* put in upper diagonal portion */
2141     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2142   }
2143   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2144 
2145   /*  Gather all column indices to all processors  */
2146   for (i = 0; i < size; i++) {
2147     recvcounts[i] = 0;
2148     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2149   }
2150   displs[0] = 0;
2151   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2152   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2153   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2154   /* set the b->ilen (length of each row) values */
2155   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2156   /* set the b->i indices */
2157   b->i[0] = 0;
2158   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2159   PetscCall(PetscFree(lens));
2160   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2161   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2162   PetscCall(PetscFree(recvcounts));
2163 
2164   PetscCall(MatPropagateSymmetryOptions(A, B));
2165   *newmat = B;
2166   PetscFunctionReturn(PETSC_SUCCESS);
2167 }
2168 
2169 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2170 {
2171   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2172   Vec          bb1 = NULL;
2173 
2174   PetscFunctionBegin;
2175   if (flag == SOR_APPLY_UPPER) {
2176     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2177     PetscFunctionReturn(PETSC_SUCCESS);
2178   }
2179 
2180   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2181 
2182   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2183     if (flag & SOR_ZERO_INITIAL_GUESS) {
2184       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2185       its--;
2186     }
2187 
2188     while (its--) {
2189       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2190       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2191 
2192       /* update rhs: bb1 = bb - B*x */
2193       PetscCall(VecScale(mat->lvec, -1.0));
2194       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2195 
2196       /* local sweep */
2197       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2198     }
2199   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2200     if (flag & SOR_ZERO_INITIAL_GUESS) {
2201       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2202       its--;
2203     }
2204     while (its--) {
2205       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2206       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2207 
2208       /* update rhs: bb1 = bb - B*x */
2209       PetscCall(VecScale(mat->lvec, -1.0));
2210       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2211 
2212       /* local sweep */
2213       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2214     }
2215   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2216     if (flag & SOR_ZERO_INITIAL_GUESS) {
2217       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2218       its--;
2219     }
2220     while (its--) {
2221       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2222       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2223 
2224       /* update rhs: bb1 = bb - B*x */
2225       PetscCall(VecScale(mat->lvec, -1.0));
2226       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2227 
2228       /* local sweep */
2229       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2230     }
2231   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2232 
2233   PetscCall(VecDestroy(&bb1));
2234   PetscFunctionReturn(PETSC_SUCCESS);
2235 }
2236 
2237 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2238 {
2239   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2240   PetscInt     m, N, i, *garray = aij->garray;
2241   PetscInt     ib, jb, bs = A->rmap->bs;
2242   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2243   MatScalar   *a_val = a_aij->a;
2244   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2245   MatScalar   *b_val = b_aij->a;
2246   PetscReal   *work;
2247 
2248   PetscFunctionBegin;
2249   PetscCall(MatGetSize(A, &m, &N));
2250   PetscCall(PetscCalloc1(N, &work));
2251   if (type == NORM_2) {
2252     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2253       for (jb = 0; jb < bs; jb++) {
2254         for (ib = 0; ib < bs; ib++) {
2255           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2256           a_val++;
2257         }
2258       }
2259     }
2260     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2261       for (jb = 0; jb < bs; jb++) {
2262         for (ib = 0; ib < bs; ib++) {
2263           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2264           b_val++;
2265         }
2266       }
2267     }
2268   } else if (type == NORM_1) {
2269     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2270       for (jb = 0; jb < bs; jb++) {
2271         for (ib = 0; ib < bs; ib++) {
2272           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2273           a_val++;
2274         }
2275       }
2276     }
2277     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2278       for (jb = 0; jb < bs; jb++) {
2279         for (ib = 0; ib < bs; ib++) {
2280           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2281           b_val++;
2282         }
2283       }
2284     }
2285   } else if (type == NORM_INFINITY) {
2286     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2287       for (jb = 0; jb < bs; jb++) {
2288         for (ib = 0; ib < bs; ib++) {
2289           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2290           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2291           a_val++;
2292         }
2293       }
2294     }
2295     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2296       for (jb = 0; jb < bs; jb++) {
2297         for (ib = 0; ib < bs; ib++) {
2298           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2299           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2300           b_val++;
2301         }
2302       }
2303     }
2304   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2305     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2306       for (jb = 0; jb < bs; jb++) {
2307         for (ib = 0; ib < bs; ib++) {
2308           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2309           a_val++;
2310         }
2311       }
2312     }
2313     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2314       for (jb = 0; jb < bs; jb++) {
2315         for (ib = 0; ib < bs; ib++) {
2316           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2317           b_val++;
2318         }
2319       }
2320     }
2321   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2322     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2323       for (jb = 0; jb < bs; jb++) {
2324         for (ib = 0; ib < bs; ib++) {
2325           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2326           a_val++;
2327         }
2328       }
2329     }
2330     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2331       for (jb = 0; jb < bs; jb++) {
2332         for (ib = 0; ib < bs; ib++) {
2333           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2334           b_val++;
2335         }
2336       }
2337     }
2338   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2339   if (type == NORM_INFINITY) {
2340     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2341   } else {
2342     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2343   }
2344   PetscCall(PetscFree(work));
2345   if (type == NORM_2) {
2346     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2347   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2348     for (i = 0; i < N; i++) reductions[i] /= m;
2349   }
2350   PetscFunctionReturn(PETSC_SUCCESS);
2351 }
2352 
2353 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2354 {
2355   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2356 
2357   PetscFunctionBegin;
2358   PetscCall(MatInvertBlockDiagonal(a->A, values));
2359   A->factorerrortype             = a->A->factorerrortype;
2360   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2361   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2362   PetscFunctionReturn(PETSC_SUCCESS);
2363 }
2364 
2365 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2366 {
2367   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2368   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2369 
2370   PetscFunctionBegin;
2371   if (!Y->preallocated) {
2372     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2373   } else if (!aij->nz) {
2374     PetscInt nonew = aij->nonew;
2375     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2376     aij->nonew = nonew;
2377   }
2378   PetscCall(MatShift_Basic(Y, a));
2379   PetscFunctionReturn(PETSC_SUCCESS);
2380 }
2381 
2382 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2383 {
2384   PetscFunctionBegin;
2385   *a = ((Mat_MPIBAIJ *)A->data)->A;
2386   PetscFunctionReturn(PETSC_SUCCESS);
2387 }
2388 
2389 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2390 {
2391   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2392 
2393   PetscFunctionBegin;
2394   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2395   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2396   PetscFunctionReturn(PETSC_SUCCESS);
2397 }
2398 
2399 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2400                                        MatGetRow_MPIBAIJ,
2401                                        MatRestoreRow_MPIBAIJ,
2402                                        MatMult_MPIBAIJ,
2403                                        /* 4*/ MatMultAdd_MPIBAIJ,
2404                                        MatMultTranspose_MPIBAIJ,
2405                                        MatMultTransposeAdd_MPIBAIJ,
2406                                        NULL,
2407                                        NULL,
2408                                        NULL,
2409                                        /*10*/ NULL,
2410                                        NULL,
2411                                        NULL,
2412                                        MatSOR_MPIBAIJ,
2413                                        MatTranspose_MPIBAIJ,
2414                                        /*15*/ MatGetInfo_MPIBAIJ,
2415                                        MatEqual_MPIBAIJ,
2416                                        MatGetDiagonal_MPIBAIJ,
2417                                        MatDiagonalScale_MPIBAIJ,
2418                                        MatNorm_MPIBAIJ,
2419                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2420                                        MatAssemblyEnd_MPIBAIJ,
2421                                        MatSetOption_MPIBAIJ,
2422                                        MatZeroEntries_MPIBAIJ,
2423                                        /*24*/ MatZeroRows_MPIBAIJ,
2424                                        NULL,
2425                                        NULL,
2426                                        NULL,
2427                                        NULL,
2428                                        /*29*/ MatSetUp_MPI_Hash,
2429                                        NULL,
2430                                        NULL,
2431                                        MatGetDiagonalBlock_MPIBAIJ,
2432                                        NULL,
2433                                        /*34*/ MatDuplicate_MPIBAIJ,
2434                                        NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        /*39*/ MatAXPY_MPIBAIJ,
2439                                        MatCreateSubMatrices_MPIBAIJ,
2440                                        MatIncreaseOverlap_MPIBAIJ,
2441                                        MatGetValues_MPIBAIJ,
2442                                        MatCopy_MPIBAIJ,
2443                                        /*44*/ NULL,
2444                                        MatScale_MPIBAIJ,
2445                                        MatShift_MPIBAIJ,
2446                                        NULL,
2447                                        MatZeroRowsColumns_MPIBAIJ,
2448                                        /*49*/ NULL,
2449                                        NULL,
2450                                        NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2454                                        NULL,
2455                                        MatSetUnfactored_MPIBAIJ,
2456                                        MatPermute_MPIBAIJ,
2457                                        MatSetValuesBlocked_MPIBAIJ,
2458                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2459                                        MatDestroy_MPIBAIJ,
2460                                        MatView_MPIBAIJ,
2461                                        NULL,
2462                                        NULL,
2463                                        /*64*/ NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        NULL,
2467                                        MatGetRowMaxAbs_MPIBAIJ,
2468                                        /*69*/ NULL,
2469                                        NULL,
2470                                        NULL,
2471                                        MatFDColoringApply_BAIJ,
2472                                        NULL,
2473                                        /*74*/ NULL,
2474                                        NULL,
2475                                        NULL,
2476                                        NULL,
2477                                        MatLoad_MPIBAIJ,
2478                                        /*79*/ NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        NULL,
2483                                        /*84*/ NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        NULL,
2488                                        /*89*/ NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        MatConjugate_MPIBAIJ,
2493                                        /*94*/ NULL,
2494                                        NULL,
2495                                        MatRealPart_MPIBAIJ,
2496                                        MatImaginaryPart_MPIBAIJ,
2497                                        NULL,
2498                                        /*99*/ NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        /*104*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2504                                        NULL,
2505                                        MatGetGhosts_MPIBAIJ,
2506                                        NULL,
2507                                        NULL,
2508                                        /*109*/ NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        MatGetMultiProcBlock_MPIBAIJ,
2513                                        /*114*/ NULL,
2514                                        MatGetColumnReductions_MPIBAIJ,
2515                                        MatInvertBlockDiagonal_MPIBAIJ,
2516                                        NULL,
2517                                        NULL,
2518                                        /*119*/ NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        /*124*/ NULL,
2524                                        NULL,
2525                                        MatSetBlockSizes_Default,
2526                                        NULL,
2527                                        MatFDColoringSetUp_MPIXAIJ,
2528                                        /*129*/ NULL,
2529                                        MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2530                                        NULL,
2531                                        NULL,
2532                                        NULL,
2533                                        /*134*/ NULL,
2534                                        NULL,
2535                                        MatEliminateZeros_MPIBAIJ,
2536                                        MatGetRowSumAbs_MPIBAIJ,
2537                                        NULL,
2538                                        /*139*/ NULL,
2539                                        NULL,
2540                                        MatCopyHashToXAIJ_MPI_Hash,
2541                                        NULL,
2542                                        NULL};
2543 
2544 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2545 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2546 
2547 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2548 {
2549   PetscInt        m, rstart, cstart, cend;
2550   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2551   const PetscInt *JJ          = NULL;
2552   PetscScalar    *values      = NULL;
2553   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2554   PetscBool       nooffprocentries;
2555 
2556   PetscFunctionBegin;
2557   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2558   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2559   PetscCall(PetscLayoutSetUp(B->rmap));
2560   PetscCall(PetscLayoutSetUp(B->cmap));
2561   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2562   m      = B->rmap->n / bs;
2563   rstart = B->rmap->rstart / bs;
2564   cstart = B->cmap->rstart / bs;
2565   cend   = B->cmap->rend / bs;
2566 
2567   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2568   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2569   for (i = 0; i < m; i++) {
2570     nz = ii[i + 1] - ii[i];
2571     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2572     nz_max = PetscMax(nz_max, nz);
2573     dlen   = 0;
2574     olen   = 0;
2575     JJ     = jj + ii[i];
2576     for (j = 0; j < nz; j++) {
2577       if (*JJ < cstart || *JJ >= cend) olen++;
2578       else dlen++;
2579       JJ++;
2580     }
2581     d_nnz[i] = dlen;
2582     o_nnz[i] = olen;
2583   }
2584   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2585   PetscCall(PetscFree2(d_nnz, o_nnz));
2586 
2587   values = (PetscScalar *)V;
2588   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2589   for (i = 0; i < m; i++) {
2590     PetscInt        row   = i + rstart;
2591     PetscInt        ncols = ii[i + 1] - ii[i];
2592     const PetscInt *icols = jj + ii[i];
2593     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2594       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2595       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2596     } else { /* block ordering does not match so we can only insert one block at a time. */
2597       PetscInt j;
2598       for (j = 0; j < ncols; j++) {
2599         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2600         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2601       }
2602     }
2603   }
2604 
2605   if (!V) PetscCall(PetscFree(values));
2606   nooffprocentries    = B->nooffprocentries;
2607   B->nooffprocentries = PETSC_TRUE;
2608   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2609   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2610   B->nooffprocentries = nooffprocentries;
2611 
2612   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2613   PetscFunctionReturn(PETSC_SUCCESS);
2614 }
2615 
2616 /*@C
2617   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2618 
2619   Collective
2620 
2621   Input Parameters:
2622 + B  - the matrix
2623 . bs - the block size
2624 . i  - the indices into `j` for the start of each local row (starts with zero)
2625 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2626 - v  - optional values in the matrix, use `NULL` if not provided
2627 
2628   Level: advanced
2629 
2630   Notes:
2631   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2632   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2633   called this routine.
2634 
2635   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2636   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2637   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2638   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2639   block column and the second index is over columns within a block.
2640 
2641   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2642 
2643 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2644 @*/
2645 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2646 {
2647   PetscFunctionBegin;
2648   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2649   PetscValidType(B, 1);
2650   PetscValidLogicalCollectiveInt(B, bs, 2);
2651   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2652   PetscFunctionReturn(PETSC_SUCCESS);
2653 }
2654 
2655 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2656 {
2657   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2658   PetscInt     i;
2659   PetscMPIInt  size;
2660 
2661   PetscFunctionBegin;
2662   if (B->hash_active) {
2663     B->ops[0]      = b->cops;
2664     B->hash_active = PETSC_FALSE;
2665   }
2666   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2667   PetscCall(MatSetBlockSize(B, bs));
2668   PetscCall(PetscLayoutSetUp(B->rmap));
2669   PetscCall(PetscLayoutSetUp(B->cmap));
2670   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2671 
2672   if (d_nnz) {
2673     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2674   }
2675   if (o_nnz) {
2676     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2677   }
2678 
2679   b->bs2 = bs * bs;
2680   b->mbs = B->rmap->n / bs;
2681   b->nbs = B->cmap->n / bs;
2682   b->Mbs = B->rmap->N / bs;
2683   b->Nbs = B->cmap->N / bs;
2684 
2685   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2686   b->rstartbs = B->rmap->rstart / bs;
2687   b->rendbs   = B->rmap->rend / bs;
2688   b->cstartbs = B->cmap->rstart / bs;
2689   b->cendbs   = B->cmap->rend / bs;
2690 
2691 #if defined(PETSC_USE_CTABLE)
2692   PetscCall(PetscHMapIDestroy(&b->colmap));
2693 #else
2694   PetscCall(PetscFree(b->colmap));
2695 #endif
2696   PetscCall(PetscFree(b->garray));
2697   PetscCall(VecDestroy(&b->lvec));
2698   PetscCall(VecScatterDestroy(&b->Mvctx));
2699 
2700   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2701 
2702   MatSeqXAIJGetOptions_Private(b->B);
2703   PetscCall(MatDestroy(&b->B));
2704   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2705   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2706   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2707   MatSeqXAIJRestoreOptions_Private(b->B);
2708 
2709   MatSeqXAIJGetOptions_Private(b->A);
2710   PetscCall(MatDestroy(&b->A));
2711   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2712   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2713   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2714   MatSeqXAIJRestoreOptions_Private(b->A);
2715 
2716   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2717   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2718   B->preallocated  = PETSC_TRUE;
2719   B->was_assembled = PETSC_FALSE;
2720   B->assembled     = PETSC_FALSE;
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2725 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2726 
2727 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2728 {
2729   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2730   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2731   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2732   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2733 
2734   PetscFunctionBegin;
2735   PetscCall(PetscMalloc1(M + 1, &ii));
2736   ii[0] = 0;
2737   for (i = 0; i < M; i++) {
2738     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2739     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2740     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2741     /* remove one from count of matrix has diagonal */
2742     for (j = id[i]; j < id[i + 1]; j++) {
2743       if (jd[j] == i) {
2744         ii[i + 1]--;
2745         break;
2746       }
2747     }
2748   }
2749   PetscCall(PetscMalloc1(ii[M], &jj));
2750   cnt = 0;
2751   for (i = 0; i < M; i++) {
2752     for (j = io[i]; j < io[i + 1]; j++) {
2753       if (garray[jo[j]] > rstart) break;
2754       jj[cnt++] = garray[jo[j]];
2755     }
2756     for (k = id[i]; k < id[i + 1]; k++) {
2757       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2758     }
2759     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2760   }
2761   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2762   PetscFunctionReturn(PETSC_SUCCESS);
2763 }
2764 
2765 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2766 
2767 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2768 
2769 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2770 {
2771   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2772   Mat_MPIAIJ  *b;
2773   Mat          B;
2774 
2775   PetscFunctionBegin;
2776   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2777 
2778   if (reuse == MAT_REUSE_MATRIX) {
2779     B = *newmat;
2780   } else {
2781     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2782     PetscCall(MatSetType(B, MATMPIAIJ));
2783     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2784     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2785     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2786     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2787   }
2788   b = (Mat_MPIAIJ *)B->data;
2789 
2790   if (reuse == MAT_REUSE_MATRIX) {
2791     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2792     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2793   } else {
2794     PetscInt   *garray = a->garray;
2795     Mat_SeqAIJ *bB;
2796     PetscInt    bs, nnz;
2797     PetscCall(MatDestroy(&b->A));
2798     PetscCall(MatDestroy(&b->B));
2799     /* just clear out the data structure */
2800     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2801     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2802     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2803 
2804     /* Global numbering for b->B columns */
2805     bB  = (Mat_SeqAIJ *)b->B->data;
2806     bs  = A->rmap->bs;
2807     nnz = bB->i[A->rmap->n];
2808     for (PetscInt k = 0; k < nnz; k++) {
2809       PetscInt bj = bB->j[k] / bs;
2810       PetscInt br = bB->j[k] % bs;
2811       bB->j[k]    = garray[bj] * bs + br;
2812     }
2813   }
2814   PetscCall(MatSetOption(B, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2815   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2816   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2817   PetscCall(MatSetOption(B, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
2818 
2819   if (reuse == MAT_INPLACE_MATRIX) {
2820     PetscCall(MatHeaderReplace(A, &B));
2821   } else {
2822     *newmat = B;
2823   }
2824   PetscFunctionReturn(PETSC_SUCCESS);
2825 }
2826 
2827 /*MC
2828    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2829 
2830    Options Database Keys:
2831 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2832 . -mat_block_size <bs> - set the blocksize used to store the matrix
2833 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2834 - -mat_use_hash_table <fact> - set hash table factor
2835 
2836    Level: beginner
2837 
2838    Note:
2839     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2840     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2841 
2842 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2843 M*/
2844 
2845 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2846 
2847 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2848 {
2849   Mat_MPIBAIJ *b;
2850   PetscBool    flg = PETSC_FALSE;
2851 
2852   PetscFunctionBegin;
2853   PetscCall(PetscNew(&b));
2854   B->data      = (void *)b;
2855   B->ops[0]    = MatOps_Values;
2856   B->assembled = PETSC_FALSE;
2857 
2858   B->insertmode = NOT_SET_VALUES;
2859   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2860   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2861 
2862   /* build local table of row and column ownerships */
2863   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2864 
2865   /* build cache for off array entries formed */
2866   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2867 
2868   b->donotstash  = PETSC_FALSE;
2869   b->colmap      = NULL;
2870   b->garray      = NULL;
2871   b->roworiented = PETSC_TRUE;
2872 
2873   /* stuff used in block assembly */
2874   b->barray = NULL;
2875 
2876   /* stuff used for matrix vector multiply */
2877   b->lvec  = NULL;
2878   b->Mvctx = NULL;
2879 
2880   /* stuff for MatGetRow() */
2881   b->rowindices   = NULL;
2882   b->rowvalues    = NULL;
2883   b->getrowactive = PETSC_FALSE;
2884 
2885   /* hash table stuff */
2886   b->ht           = NULL;
2887   b->hd           = NULL;
2888   b->ht_size      = 0;
2889   b->ht_flag      = PETSC_FALSE;
2890   b->ht_fact      = 0;
2891   b->ht_total_ct  = 0;
2892   b->ht_insert_ct = 0;
2893 
2894   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2895   b->ijonly = PETSC_FALSE;
2896 
2897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2900 #if defined(PETSC_HAVE_HYPRE)
2901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2902 #endif
2903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2910   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2911 
2912   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2913   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2914   if (flg) {
2915     PetscReal fact = 1.39;
2916     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2917     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2918     if (fact <= 1.0) fact = 1.39;
2919     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2920     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2921   }
2922   PetscOptionsEnd();
2923   PetscFunctionReturn(PETSC_SUCCESS);
2924 }
2925 
2926 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2927 /*MC
2928    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2929 
2930    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2931    and `MATMPIBAIJ` otherwise.
2932 
2933    Options Database Keys:
2934 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2935 
2936   Level: beginner
2937 
2938 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2939 M*/
2940 
2941 /*@
2942   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2943   (block compressed row).
2944 
2945   Collective
2946 
2947   Input Parameters:
2948 + B     - the matrix
2949 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2950           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2951 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2952            submatrix  (same for all local rows)
2953 . d_nnz - array containing the number of block nonzeros in the various block rows
2954            of the in diagonal portion of the local (possibly different for each block
2955            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2956            set it even if it is zero.
2957 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2958            submatrix (same for all local rows).
2959 - o_nnz - array containing the number of nonzeros in the various block rows of the
2960            off-diagonal portion of the local submatrix (possibly different for
2961            each block row) or `NULL`.
2962 
2963    If the *_nnz parameter is given then the *_nz parameter is ignored
2964 
2965   Options Database Keys:
2966 + -mat_block_size            - size of the blocks to use
2967 - -mat_use_hash_table <fact> - set hash table factor
2968 
2969   Level: intermediate
2970 
2971   Notes:
2972   For good matrix assembly performance
2973   the user should preallocate the matrix storage by setting the parameters
2974   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2975   performance can be increased by more than a factor of 50.
2976 
2977   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2978   than it must be used on all processors that share the object for that argument.
2979 
2980   Storage Information:
2981   For a square global matrix we define each processor's diagonal portion
2982   to be its local rows and the corresponding columns (a square submatrix);
2983   each processor's off-diagonal portion encompasses the remainder of the
2984   local matrix (a rectangular submatrix).
2985 
2986   The user can specify preallocated storage for the diagonal part of
2987   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2988   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2989   memory allocation.  Likewise, specify preallocated storage for the
2990   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2991 
2992   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2993   the figure below we depict these three local rows and all columns (0-11).
2994 
2995 .vb
2996            0 1 2 3 4 5 6 7 8 9 10 11
2997           --------------------------
2998    row 3  |o o o d d d o o o o  o  o
2999    row 4  |o o o d d d o o o o  o  o
3000    row 5  |o o o d d d o o o o  o  o
3001           --------------------------
3002 .ve
3003 
3004   Thus, any entries in the d locations are stored in the d (diagonal)
3005   submatrix, and any entries in the o locations are stored in the
3006   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3007   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3008 
3009   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3010   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3011   In general, for PDE problems in which most nonzeros are near the diagonal,
3012   one expects `d_nz` >> `o_nz`.
3013 
3014   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3015   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3016   You can also run with the option `-info` and look for messages with the string
3017   malloc in them to see if additional memory allocation was needed.
3018 
3019 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3020 @*/
3021 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3022 {
3023   PetscFunctionBegin;
3024   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3025   PetscValidType(B, 1);
3026   PetscValidLogicalCollectiveInt(B, bs, 2);
3027   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3028   PetscFunctionReturn(PETSC_SUCCESS);
3029 }
3030 
3031 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3032 /*@
3033   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3034   (block compressed row).
3035 
3036   Collective
3037 
3038   Input Parameters:
3039 + comm  - MPI communicator
3040 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3041           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3042 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3043           This value should be the same as the local size used in creating the
3044           y vector for the matrix-vector product y = Ax.
3045 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3046           This value should be the same as the local size used in creating the
3047           x vector for the matrix-vector product y = Ax.
3048 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3049 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3050 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3051           submatrix  (same for all local rows)
3052 . d_nnz - array containing the number of nonzero blocks in the various block rows
3053           of the in diagonal portion of the local (possibly different for each block
3054           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3055           and set it even if it is zero.
3056 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3057           submatrix (same for all local rows).
3058 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3059           off-diagonal portion of the local submatrix (possibly different for
3060           each block row) or NULL.
3061 
3062   Output Parameter:
3063 . A - the matrix
3064 
3065   Options Database Keys:
3066 + -mat_block_size            - size of the blocks to use
3067 - -mat_use_hash_table <fact> - set hash table factor
3068 
3069   Level: intermediate
3070 
3071   Notes:
3072   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3073   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3074   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3075 
3076   For good matrix assembly performance
3077   the user should preallocate the matrix storage by setting the parameters
3078   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3079   performance can be increased by more than a factor of 50.
3080 
3081   If the *_nnz parameter is given then the *_nz parameter is ignored
3082 
3083   A nonzero block is any block that as 1 or more nonzeros in it
3084 
3085   The user MUST specify either the local or global matrix dimensions
3086   (possibly both).
3087 
3088   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3089   than it must be used on all processors that share the object for that argument.
3090 
3091   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3092   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3093 
3094   Storage Information:
3095   For a square global matrix we define each processor's diagonal portion
3096   to be its local rows and the corresponding columns (a square submatrix);
3097   each processor's off-diagonal portion encompasses the remainder of the
3098   local matrix (a rectangular submatrix).
3099 
3100   The user can specify preallocated storage for the diagonal part of
3101   the local submatrix with either d_nz or d_nnz (not both).  Set
3102   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3103   memory allocation.  Likewise, specify preallocated storage for the
3104   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3105 
3106   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3107   the figure below we depict these three local rows and all columns (0-11).
3108 
3109 .vb
3110            0 1 2 3 4 5 6 7 8 9 10 11
3111           --------------------------
3112    row 3  |o o o d d d o o o o  o  o
3113    row 4  |o o o d d d o o o o  o  o
3114    row 5  |o o o d d d o o o o  o  o
3115           --------------------------
3116 .ve
3117 
3118   Thus, any entries in the d locations are stored in the d (diagonal)
3119   submatrix, and any entries in the o locations are stored in the
3120   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3121   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3122 
3123   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3124   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3125   In general, for PDE problems in which most nonzeros are near the diagonal,
3126   one expects `d_nz` >> `o_nz`.
3127 
3128 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3129           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3130 @*/
3131 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3132 {
3133   PetscMPIInt size;
3134 
3135   PetscFunctionBegin;
3136   PetscCall(MatCreate(comm, A));
3137   PetscCall(MatSetSizes(*A, m, n, M, N));
3138   PetscCallMPI(MPI_Comm_size(comm, &size));
3139   if (size > 1) {
3140     PetscCall(MatSetType(*A, MATMPIBAIJ));
3141     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3142   } else {
3143     PetscCall(MatSetType(*A, MATSEQBAIJ));
3144     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3145   }
3146   PetscFunctionReturn(PETSC_SUCCESS);
3147 }
3148 
3149 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3150 {
3151   Mat          mat;
3152   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3153   PetscInt     len = 0;
3154 
3155   PetscFunctionBegin;
3156   *newmat = NULL;
3157   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3158   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3159   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3160 
3161   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3162   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3163   if (matin->hash_active) {
3164     PetscCall(MatSetUp(mat));
3165   } else {
3166     mat->factortype   = matin->factortype;
3167     mat->preallocated = PETSC_TRUE;
3168     mat->assembled    = PETSC_TRUE;
3169     mat->insertmode   = NOT_SET_VALUES;
3170 
3171     a             = (Mat_MPIBAIJ *)mat->data;
3172     mat->rmap->bs = matin->rmap->bs;
3173     a->bs2        = oldmat->bs2;
3174     a->mbs        = oldmat->mbs;
3175     a->nbs        = oldmat->nbs;
3176     a->Mbs        = oldmat->Mbs;
3177     a->Nbs        = oldmat->Nbs;
3178 
3179     a->size         = oldmat->size;
3180     a->rank         = oldmat->rank;
3181     a->donotstash   = oldmat->donotstash;
3182     a->roworiented  = oldmat->roworiented;
3183     a->rowindices   = NULL;
3184     a->rowvalues    = NULL;
3185     a->getrowactive = PETSC_FALSE;
3186     a->barray       = NULL;
3187     a->rstartbs     = oldmat->rstartbs;
3188     a->rendbs       = oldmat->rendbs;
3189     a->cstartbs     = oldmat->cstartbs;
3190     a->cendbs       = oldmat->cendbs;
3191 
3192     /* hash table stuff */
3193     a->ht           = NULL;
3194     a->hd           = NULL;
3195     a->ht_size      = 0;
3196     a->ht_flag      = oldmat->ht_flag;
3197     a->ht_fact      = oldmat->ht_fact;
3198     a->ht_total_ct  = 0;
3199     a->ht_insert_ct = 0;
3200 
3201     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3202     if (oldmat->colmap) {
3203 #if defined(PETSC_USE_CTABLE)
3204       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3205 #else
3206       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3207       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3208 #endif
3209     } else a->colmap = NULL;
3210 
3211     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3212       PetscCall(PetscMalloc1(len, &a->garray));
3213       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3214     } else a->garray = NULL;
3215 
3216     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3217     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3218     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3219 
3220     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3221     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3222   }
3223   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3224   *newmat = mat;
3225   PetscFunctionReturn(PETSC_SUCCESS);
3226 }
3227 
3228 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3229 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3230 {
3231   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3232   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3233   PetscScalar *matvals;
3234 
3235   PetscFunctionBegin;
3236   PetscCall(PetscViewerSetUp(viewer));
3237 
3238   /* read in matrix header */
3239   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3240   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3241   M  = header[1];
3242   N  = header[2];
3243   nz = header[3];
3244   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3245   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3246   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3247 
3248   /* set block sizes from the viewer's .info file */
3249   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3250   /* set local sizes if not set already */
3251   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3252   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3253   /* set global sizes if not set already */
3254   if (mat->rmap->N < 0) mat->rmap->N = M;
3255   if (mat->cmap->N < 0) mat->cmap->N = N;
3256   PetscCall(PetscLayoutSetUp(mat->rmap));
3257   PetscCall(PetscLayoutSetUp(mat->cmap));
3258 
3259   /* check if the matrix sizes are correct */
3260   PetscCall(MatGetSize(mat, &rows, &cols));
3261   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3262   PetscCall(MatGetBlockSize(mat, &bs));
3263   PetscCall(MatGetLocalSize(mat, &m, &n));
3264   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3265   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3266   mbs = m / bs;
3267   nbs = n / bs;
3268 
3269   /* read in row lengths and build row indices */
3270   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3271   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3272   rowidxs[0] = 0;
3273   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3274   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3275   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3276 
3277   /* read in column indices and matrix values */
3278   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3279   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3280   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3281 
3282   {                /* preallocate matrix storage */
3283     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3284     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3285     PetscBool  sbaij, done;
3286     PetscInt  *d_nnz, *o_nnz;
3287 
3288     PetscCall(PetscBTCreate(nbs, &bt));
3289     PetscCall(PetscHSetICreate(&ht));
3290     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3291     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3292     for (i = 0; i < mbs; i++) {
3293       PetscCall(PetscBTMemzero(nbs, bt));
3294       PetscCall(PetscHSetIClear(ht));
3295       for (k = 0; k < bs; k++) {
3296         PetscInt row = bs * i + k;
3297         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3298           PetscInt col = colidxs[j];
3299           if (!sbaij || col >= row) {
3300             if (col >= cs && col < ce) {
3301               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3302             } else {
3303               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3304               if (done) o_nnz[i]++;
3305             }
3306           }
3307         }
3308       }
3309     }
3310     PetscCall(PetscBTDestroy(&bt));
3311     PetscCall(PetscHSetIDestroy(&ht));
3312     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3313     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3314     PetscCall(PetscFree2(d_nnz, o_nnz));
3315   }
3316 
3317   /* store matrix values */
3318   for (i = 0; i < m; i++) {
3319     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3320     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3321   }
3322 
3323   PetscCall(PetscFree(rowidxs));
3324   PetscCall(PetscFree2(colidxs, matvals));
3325   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3326   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3327   PetscFunctionReturn(PETSC_SUCCESS);
3328 }
3329 
3330 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3331 {
3332   PetscBool isbinary;
3333 
3334   PetscFunctionBegin;
3335   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3336   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3337   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3338   PetscFunctionReturn(PETSC_SUCCESS);
3339 }
3340 
3341 /*@
3342   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3343 
3344   Input Parameters:
3345 + mat  - the matrix
3346 - fact - factor
3347 
3348   Options Database Key:
3349 . -mat_use_hash_table <fact> - provide the factor
3350 
3351   Level: advanced
3352 
3353 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3354 @*/
3355 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3356 {
3357   PetscFunctionBegin;
3358   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3359   PetscFunctionReturn(PETSC_SUCCESS);
3360 }
3361 
3362 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3363 {
3364   Mat_MPIBAIJ *baij;
3365 
3366   PetscFunctionBegin;
3367   baij          = (Mat_MPIBAIJ *)mat->data;
3368   baij->ht_fact = fact;
3369   PetscFunctionReturn(PETSC_SUCCESS);
3370 }
3371 
3372 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3373 {
3374   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3375   PetscBool    flg;
3376 
3377   PetscFunctionBegin;
3378   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3379   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3380   if (Ad) *Ad = a->A;
3381   if (Ao) *Ao = a->B;
3382   if (colmap) *colmap = a->garray;
3383   PetscFunctionReturn(PETSC_SUCCESS);
3384 }
3385 
3386 /*
3387     Special version for direct calls from Fortran (to eliminate two function call overheads
3388 */
3389 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3390   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3391 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3392   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3393 #endif
3394 
3395 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3396 /*@C
3397   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3398 
3399   Collective
3400 
3401   Input Parameters:
3402 + matin  - the matrix
3403 . min    - number of input rows
3404 . im     - input rows
3405 . nin    - number of input columns
3406 . in     - input columns
3407 . v      - numerical values input
3408 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3409 
3410   Level: advanced
3411 
3412   Developer Notes:
3413   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3414 
3415 .seealso: `Mat`, `MatSetValuesBlocked()`
3416 @*/
3417 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3418 {
3419   /* convert input arguments to C version */
3420   Mat        mat = *matin;
3421   PetscInt   m = *min, n = *nin;
3422   InsertMode addv = *addvin;
3423 
3424   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3425   const MatScalar *value;
3426   MatScalar       *barray      = baij->barray;
3427   PetscBool        roworiented = baij->roworiented;
3428   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3429   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3430   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3431 
3432   PetscFunctionBegin;
3433   /* tasks normally handled by MatSetValuesBlocked() */
3434   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3435   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3436   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3437   if (mat->assembled) {
3438     mat->was_assembled = PETSC_TRUE;
3439     mat->assembled     = PETSC_FALSE;
3440   }
3441   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3442 
3443   if (!barray) {
3444     PetscCall(PetscMalloc1(bs2, &barray));
3445     baij->barray = barray;
3446   }
3447 
3448   if (roworiented) stepval = (n - 1) * bs;
3449   else stepval = (m - 1) * bs;
3450 
3451   for (i = 0; i < m; i++) {
3452     if (im[i] < 0) continue;
3453     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3454     if (im[i] >= rstart && im[i] < rend) {
3455       row = im[i] - rstart;
3456       for (j = 0; j < n; j++) {
3457         /* If NumCol = 1 then a copy is not required */
3458         if ((roworiented) && (n == 1)) {
3459           barray = (MatScalar *)v + i * bs2;
3460         } else if ((!roworiented) && (m == 1)) {
3461           barray = (MatScalar *)v + j * bs2;
3462         } else { /* Here a copy is required */
3463           if (roworiented) {
3464             value = v + i * (stepval + bs) * bs + j * bs;
3465           } else {
3466             value = v + j * (stepval + bs) * bs + i * bs;
3467           }
3468           for (ii = 0; ii < bs; ii++, value += stepval) {
3469             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3470           }
3471           barray -= bs2;
3472         }
3473 
3474         if (in[j] >= cstart && in[j] < cend) {
3475           col = in[j] - cstart;
3476           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3477         } else if (in[j] < 0) {
3478           continue;
3479         } else {
3480           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3481           if (mat->was_assembled) {
3482             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3483 
3484 #if defined(PETSC_USE_DEBUG)
3485   #if defined(PETSC_USE_CTABLE)
3486             {
3487               PetscInt data;
3488               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3489               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3490             }
3491   #else
3492             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3493   #endif
3494 #endif
3495 #if defined(PETSC_USE_CTABLE)
3496             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3497             col = (col - 1) / bs;
3498 #else
3499             col = (baij->colmap[in[j]] - 1) / bs;
3500 #endif
3501             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3502               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3503               col = in[j];
3504             }
3505           } else col = in[j];
3506           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3507         }
3508       }
3509     } else {
3510       if (!baij->donotstash) {
3511         if (roworiented) {
3512           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3513         } else {
3514           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3515         }
3516       }
3517     }
3518   }
3519 
3520   /* task normally handled by MatSetValuesBlocked() */
3521   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3522   PetscFunctionReturn(PETSC_SUCCESS);
3523 }
3524 
3525 /*@
3526   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3527 
3528   Collective
3529 
3530   Input Parameters:
3531 + comm - MPI communicator
3532 . bs   - the block size, only a block size of 1 is supported
3533 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3534 . n    - This value should be the same as the local size used in creating the
3535          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3536          calculated if `N` is given) For square matrices `n` is almost always `m`.
3537 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3538 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3539 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3540 . j    - column indices
3541 - a    - matrix values
3542 
3543   Output Parameter:
3544 . mat - the matrix
3545 
3546   Level: intermediate
3547 
3548   Notes:
3549   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3550   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3551   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3552 
3553   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3554   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3555   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3556   with column-major ordering within blocks.
3557 
3558   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3559 
3560 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3561           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3562 @*/
3563 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3564 {
3565   PetscFunctionBegin;
3566   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3567   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3568   PetscCall(MatCreate(comm, mat));
3569   PetscCall(MatSetSizes(*mat, m, n, M, N));
3570   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3571   PetscCall(MatSetBlockSize(*mat, bs));
3572   PetscCall(MatSetUp(*mat));
3573   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3574   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3575   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3576   PetscFunctionReturn(PETSC_SUCCESS);
3577 }
3578 
3579 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3580 {
3581   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3582   PetscInt    *indx;
3583   PetscScalar *values;
3584 
3585   PetscFunctionBegin;
3586   PetscCall(MatGetSize(inmat, &m, &N));
3587   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3588     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3589     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3590     PetscInt    *bindx, rmax = a->rmax, j;
3591     PetscMPIInt  rank, size;
3592 
3593     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3594     mbs = m / bs;
3595     Nbs = N / cbs;
3596     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3597     nbs = n / cbs;
3598 
3599     PetscCall(PetscMalloc1(rmax, &bindx));
3600     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3601 
3602     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3603     PetscCallMPI(MPI_Comm_rank(comm, &size));
3604     if (rank == size - 1) {
3605       /* Check sum(nbs) = Nbs */
3606       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3607     }
3608 
3609     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3610     for (i = 0; i < mbs; i++) {
3611       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3612       nnz = nnz / bs;
3613       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3614       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3615       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3616     }
3617     PetscCall(PetscFree(bindx));
3618 
3619     PetscCall(MatCreate(comm, outmat));
3620     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3621     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3622     PetscCall(MatSetType(*outmat, MATBAIJ));
3623     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3624     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3625     MatPreallocateEnd(dnz, onz);
3626     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3627   }
3628 
3629   /* numeric phase */
3630   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3631   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3632 
3633   for (i = 0; i < m; i++) {
3634     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3635     Ii = i + rstart;
3636     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3637     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3638   }
3639   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3640   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3641   PetscFunctionReturn(PETSC_SUCCESS);
3642 }
3643