xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 966bd95a39c2334d2e2ce17ad22128f3c1861eeb)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
663     row = idxm[i] - bsrstart;
664     for (j = 0; j < n; j++) {
665       if (idxn[j] < 0) continue; /* negative column */
666       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667       if (idxn[j] >= bscstart && idxn[j] < bscend) {
668         col = idxn[j] - bscstart;
669         PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670       } else {
671         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674         data--;
675 #else
676         data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
679         else {
680           col = data + idxn[j] % bs;
681           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682         }
683       }
684     }
685   }
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
690 {
691   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
692   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
693   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
694   PetscReal    sum = 0.0;
695   MatScalar   *v;
696 
697   PetscFunctionBegin;
698   if (baij->size == 1) {
699     PetscCall(MatNorm(baij->A, type, nrm));
700   } else {
701     if (type == NORM_FROBENIUS) {
702       v  = amat->a;
703       nz = amat->nz * bs2;
704       for (i = 0; i < nz; i++) {
705         sum += PetscRealPart(PetscConj(*v) * (*v));
706         v++;
707       }
708       v  = bmat->a;
709       nz = bmat->nz * bs2;
710       for (i = 0; i < nz; i++) {
711         sum += PetscRealPart(PetscConj(*v) * (*v));
712         v++;
713       }
714       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
715       *nrm = PetscSqrtReal(*nrm);
716     } else if (type == NORM_1) { /* max column sum */
717       PetscReal *tmp;
718       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
719 
720       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
721       v  = amat->a;
722       jj = amat->j;
723       for (i = 0; i < amat->nz; i++) {
724         for (j = 0; j < bs; j++) {
725           col = bs * (cstart + *jj) + j; /* column index */
726           for (row = 0; row < bs; row++) {
727             tmp[col] += PetscAbsScalar(*v);
728             v++;
729           }
730         }
731         jj++;
732       }
733       v  = bmat->a;
734       jj = bmat->j;
735       for (i = 0; i < bmat->nz; i++) {
736         for (j = 0; j < bs; j++) {
737           col = bs * garray[*jj] + j;
738           for (row = 0; row < bs; row++) {
739             tmp[col] += PetscAbsScalar(*v);
740             v++;
741           }
742         }
743         jj++;
744       }
745       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
746       *nrm = 0.0;
747       for (j = 0; j < mat->cmap->N; j++) {
748         if (tmp[j] > *nrm) *nrm = tmp[j];
749       }
750       PetscCall(PetscFree(tmp));
751     } else if (type == NORM_INFINITY) { /* max row sum */
752       PetscReal *sums;
753       PetscCall(PetscMalloc1(bs, &sums));
754       sum = 0.0;
755       for (j = 0; j < amat->mbs; j++) {
756         for (row = 0; row < bs; row++) sums[row] = 0.0;
757         v  = amat->a + bs2 * amat->i[j];
758         nz = amat->i[j + 1] - amat->i[j];
759         for (i = 0; i < nz; i++) {
760           for (col = 0; col < bs; col++) {
761             for (row = 0; row < bs; row++) {
762               sums[row] += PetscAbsScalar(*v);
763               v++;
764             }
765           }
766         }
767         v  = bmat->a + bs2 * bmat->i[j];
768         nz = bmat->i[j + 1] - bmat->i[j];
769         for (i = 0; i < nz; i++) {
770           for (col = 0; col < bs; col++) {
771             for (row = 0; row < bs; row++) {
772               sums[row] += PetscAbsScalar(*v);
773               v++;
774             }
775           }
776         }
777         for (row = 0; row < bs; row++) {
778           if (sums[row] > sum) sum = sums[row];
779         }
780       }
781       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
782       PetscCall(PetscFree(sums));
783     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
784   }
785   PetscFunctionReturn(PETSC_SUCCESS);
786 }
787 
788 /*
789   Creates the hash table, and sets the table
790   This table is created only once.
791   If new entries need to be added to the matrix
792   then the hash table has to be destroyed and
793   recreated.
794 */
795 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
796 {
797   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
798   Mat          A = baij->A, B = baij->B;
799   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
800   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
801   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
802   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
803   PetscInt    *HT, key;
804   MatScalar  **HD;
805   PetscReal    tmp;
806 #if defined(PETSC_USE_INFO)
807   PetscInt ct = 0, max = 0;
808 #endif
809 
810   PetscFunctionBegin;
811   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
812 
813   baij->ht_size = (PetscInt)(factor * nz);
814   ht_size       = baij->ht_size;
815 
816   /* Allocate Memory for Hash Table */
817   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
818   HD = baij->hd;
819   HT = baij->ht;
820 
821   /* Loop Over A */
822   for (i = 0; i < a->mbs; i++) {
823     for (j = ai[i]; j < ai[i + 1]; j++) {
824       row = i + rstart;
825       col = aj[j] + cstart;
826 
827       key = row * Nbs + col + 1;
828       h1  = HASH(ht_size, key, tmp);
829       for (k = 0; k < ht_size; k++) {
830         if (!HT[(h1 + k) % ht_size]) {
831           HT[(h1 + k) % ht_size] = key;
832           HD[(h1 + k) % ht_size] = a->a + j * bs2;
833           break;
834 #if defined(PETSC_USE_INFO)
835         } else {
836           ct++;
837 #endif
838         }
839       }
840 #if defined(PETSC_USE_INFO)
841       if (k > max) max = k;
842 #endif
843     }
844   }
845   /* Loop Over B */
846   for (i = 0; i < b->mbs; i++) {
847     for (j = bi[i]; j < bi[i + 1]; j++) {
848       row = i + rstart;
849       col = garray[bj[j]];
850       key = row * Nbs + col + 1;
851       h1  = HASH(ht_size, key, tmp);
852       for (k = 0; k < ht_size; k++) {
853         if (!HT[(h1 + k) % ht_size]) {
854           HT[(h1 + k) % ht_size] = key;
855           HD[(h1 + k) % ht_size] = b->a + j * bs2;
856           break;
857 #if defined(PETSC_USE_INFO)
858         } else {
859           ct++;
860 #endif
861         }
862       }
863 #if defined(PETSC_USE_INFO)
864       if (k > max) max = k;
865 #endif
866     }
867   }
868 
869   /* Print Summary */
870 #if defined(PETSC_USE_INFO)
871   for (i = 0, j = 0; i < ht_size; i++) {
872     if (HT[i]) j++;
873   }
874   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
875 #endif
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
880 {
881   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
882   PetscInt     nstash, reallocs;
883 
884   PetscFunctionBegin;
885   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
886 
887   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
888   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
889   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
890   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
891   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
892   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
893   PetscFunctionReturn(PETSC_SUCCESS);
894 }
895 
896 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
897 {
898   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
899   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
900   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
901   PetscInt    *row, *col;
902   PetscBool    r1, r2, r3, all_assembled;
903   MatScalar   *val;
904   PetscMPIInt  n;
905 
906   PetscFunctionBegin;
907   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
908   if (!baij->donotstash && !mat->nooffprocentries) {
909     while (1) {
910       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
911       if (!flg) break;
912 
913       for (i = 0; i < n;) {
914         /* Now identify the consecutive vals belonging to the same row */
915         for (j = i, rstart = row[j]; j < n; j++) {
916           if (row[j] != rstart) break;
917         }
918         if (j < n) ncols = j - i;
919         else ncols = n - i;
920         /* Now assemble all these values with a single function call */
921         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
922         i = j;
923       }
924     }
925     PetscCall(MatStashScatterEnd_Private(&mat->stash));
926     /* Now process the block-stash. Since the values are stashed column-oriented,
927        set the row-oriented flag to column-oriented, and after MatSetValues()
928        restore the original flags */
929     r1 = baij->roworiented;
930     r2 = a->roworiented;
931     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
932 
933     baij->roworiented                           = PETSC_FALSE;
934     a->roworiented                              = PETSC_FALSE;
935     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
936     while (1) {
937       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
938       if (!flg) break;
939 
940       for (i = 0; i < n;) {
941         /* Now identify the consecutive vals belonging to the same row */
942         for (j = i, rstart = row[j]; j < n; j++) {
943           if (row[j] != rstart) break;
944         }
945         if (j < n) ncols = j - i;
946         else ncols = n - i;
947         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
948         i = j;
949       }
950     }
951     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
952 
953     baij->roworiented                           = r1;
954     a->roworiented                              = r2;
955     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
956   }
957 
958   PetscCall(MatAssemblyBegin(baij->A, mode));
959   PetscCall(MatAssemblyEnd(baij->A, mode));
960 
961   /* determine if any process has disassembled, if so we must
962      also disassemble ourselves, in order that we may reassemble. */
963   /*
964      if nonzero structure of submatrix B cannot change then we know that
965      no process disassembled thus we can skip this stuff
966   */
967   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
968     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
969     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
970   }
971 
972   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
973   PetscCall(MatAssemblyBegin(baij->B, mode));
974   PetscCall(MatAssemblyEnd(baij->B, mode));
975 
976 #if defined(PETSC_USE_INFO)
977   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
978     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
979 
980     baij->ht_total_ct  = 0;
981     baij->ht_insert_ct = 0;
982   }
983 #endif
984   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
985     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
986 
987     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
988     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
989   }
990 
991   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
992 
993   baij->rowvalues = NULL;
994 
995   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
996   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
997     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
998     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
999   }
1000   PetscFunctionReturn(PETSC_SUCCESS);
1001 }
1002 
1003 #include <petscdraw.h>
1004 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1005 {
1006   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1007   PetscMPIInt       rank = baij->rank;
1008   PetscInt          bs   = mat->rmap->bs;
1009   PetscBool         isascii, isdraw;
1010   PetscViewer       sviewer;
1011   PetscViewerFormat format;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1015   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1016   if (isascii) {
1017     PetscCall(PetscViewerGetFormat(viewer, &format));
1018     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1019       MatInfo info;
1020       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1021       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1022       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1023       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1024                                                    mat->rmap->bs, info.memory));
1025       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1026       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1027       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1028       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1029       PetscCall(PetscViewerFlush(viewer));
1030       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1031       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1032       PetscCall(VecScatterView(baij->Mvctx, viewer));
1033       PetscFunctionReturn(PETSC_SUCCESS);
1034     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1035       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1036       PetscFunctionReturn(PETSC_SUCCESS);
1037     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1038       PetscFunctionReturn(PETSC_SUCCESS);
1039     }
1040   }
1041 
1042   if (isdraw) {
1043     PetscDraw draw;
1044     PetscBool isnull;
1045     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1046     PetscCall(PetscDrawIsNull(draw, &isnull));
1047     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1048   }
1049 
1050   {
1051     /* assemble the entire matrix onto first processor. */
1052     Mat          A;
1053     Mat_SeqBAIJ *Aloc;
1054     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1055     MatScalar   *a;
1056     const char  *matname;
1057 
1058     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1059     /* Perhaps this should be the type of mat? */
1060     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1061     if (rank == 0) {
1062       PetscCall(MatSetSizes(A, M, N, M, N));
1063     } else {
1064       PetscCall(MatSetSizes(A, 0, 0, M, N));
1065     }
1066     PetscCall(MatSetType(A, MATMPIBAIJ));
1067     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1068     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1069 
1070     /* copy over the A part */
1071     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1072     ai   = Aloc->i;
1073     aj   = Aloc->j;
1074     a    = Aloc->a;
1075     PetscCall(PetscMalloc1(bs, &rvals));
1076 
1077     for (i = 0; i < mbs; i++) {
1078       rvals[0] = bs * (baij->rstartbs + i);
1079       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1080       for (j = ai[i]; j < ai[i + 1]; j++) {
1081         col = (baij->cstartbs + aj[j]) * bs;
1082         for (k = 0; k < bs; k++) {
1083           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1084           col++;
1085           a += bs;
1086         }
1087       }
1088     }
1089     /* copy over the B part */
1090     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1091     ai   = Aloc->i;
1092     aj   = Aloc->j;
1093     a    = Aloc->a;
1094     for (i = 0; i < mbs; i++) {
1095       rvals[0] = bs * (baij->rstartbs + i);
1096       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1097       for (j = ai[i]; j < ai[i + 1]; j++) {
1098         col = baij->garray[aj[j]] * bs;
1099         for (k = 0; k < bs; k++) {
1100           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1101           col++;
1102           a += bs;
1103         }
1104       }
1105     }
1106     PetscCall(PetscFree(rvals));
1107     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1108     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1109     /*
1110        Everyone has to call to draw the matrix since the graphics waits are
1111        synchronized across all processors that share the PetscDraw object
1112     */
1113     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1114     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1115     if (rank == 0) {
1116       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1117       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1118     }
1119     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1120     PetscCall(MatDestroy(&A));
1121   }
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1126 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1127 {
1128   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1129   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1130   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1131   const PetscInt *garray = aij->garray;
1132   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1133   PetscCount      nz, hnz;
1134   PetscInt       *rowlens, *colidxs;
1135   PetscScalar    *matvals;
1136   PetscMPIInt     rank;
1137 
1138   PetscFunctionBegin;
1139   PetscCall(PetscViewerSetUp(viewer));
1140 
1141   M  = mat->rmap->N;
1142   N  = mat->cmap->N;
1143   m  = mat->rmap->n;
1144   rs = mat->rmap->rstart;
1145   cs = mat->cmap->rstart;
1146   bs = mat->rmap->bs;
1147   nz = bs * bs * (A->nz + B->nz);
1148 
1149   /* write matrix header */
1150   header[0] = MAT_FILE_CLASSID;
1151   header[1] = M;
1152   header[2] = N;
1153   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1154   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1155   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1156   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1157 
1158   /* fill in and store row lengths */
1159   PetscCall(PetscMalloc1(m, &rowlens));
1160   for (cnt = 0, i = 0; i < A->mbs; i++)
1161     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1162   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1163   PetscCall(PetscFree(rowlens));
1164 
1165   /* fill in and store column indices */
1166   PetscCall(PetscMalloc1(nz, &colidxs));
1167   for (cnt = 0, i = 0; i < A->mbs; i++) {
1168     for (k = 0; k < bs; k++) {
1169       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1170         if (garray[B->j[jb]] > cs / bs) break;
1171         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1172       }
1173       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1174         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1175       for (; jb < B->i[i + 1]; jb++)
1176         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1177     }
1178   }
1179   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1180   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1181   PetscCall(PetscFree(colidxs));
1182 
1183   /* fill in and store nonzero values */
1184   PetscCall(PetscMalloc1(nz, &matvals));
1185   for (cnt = 0, i = 0; i < A->mbs; i++) {
1186     for (k = 0; k < bs; k++) {
1187       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1188         if (garray[B->j[jb]] > cs / bs) break;
1189         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1190       }
1191       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1192         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1193       for (; jb < B->i[i + 1]; jb++)
1194         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1195     }
1196   }
1197   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1198   PetscCall(PetscFree(matvals));
1199 
1200   /* write block size option to the viewer's .info file */
1201   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1202   PetscFunctionReturn(PETSC_SUCCESS);
1203 }
1204 
1205 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1206 {
1207   PetscBool isascii, isdraw, issocket, isbinary;
1208 
1209   PetscFunctionBegin;
1210   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1211   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1212   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1213   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1214   if (isascii || isdraw || issocket) {
1215     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1216   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1217   PetscFunctionReturn(PETSC_SUCCESS);
1218 }
1219 
1220 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1221 {
1222   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1223   PetscInt     nt;
1224 
1225   PetscFunctionBegin;
1226   PetscCall(VecGetLocalSize(xx, &nt));
1227   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1228   PetscCall(VecGetLocalSize(yy, &nt));
1229   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1230   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1231   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1232   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1233   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1234   PetscFunctionReturn(PETSC_SUCCESS);
1235 }
1236 
1237 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1238 {
1239   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1240 
1241   PetscFunctionBegin;
1242   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1243   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1244   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1245   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1246   PetscFunctionReturn(PETSC_SUCCESS);
1247 }
1248 
1249 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1250 {
1251   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1252 
1253   PetscFunctionBegin;
1254   /* do nondiagonal part */
1255   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1256   /* do local part */
1257   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1258   /* add partial results together */
1259   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1260   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1261   PetscFunctionReturn(PETSC_SUCCESS);
1262 }
1263 
1264 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1265 {
1266   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1267 
1268   PetscFunctionBegin;
1269   /* do nondiagonal part */
1270   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1271   /* do local part */
1272   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1273   /* add partial results together */
1274   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1275   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 /*
1280   This only works correctly for square matrices where the subblock A->A is the
1281    diagonal block
1282 */
1283 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1284 {
1285   PetscFunctionBegin;
1286   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1287   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1288   PetscFunctionReturn(PETSC_SUCCESS);
1289 }
1290 
1291 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1292 {
1293   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1294 
1295   PetscFunctionBegin;
1296   PetscCall(MatScale(a->A, aa));
1297   PetscCall(MatScale(a->B, aa));
1298   PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300 
1301 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1302 {
1303   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1304   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1305   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1306   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1307   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1308 
1309   PetscFunctionBegin;
1310   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1311   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1312   mat->getrowactive = PETSC_TRUE;
1313 
1314   if (!mat->rowvalues && (idx || v)) {
1315     /*
1316         allocate enough space to hold information from the longest row.
1317     */
1318     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1319     PetscInt     max = 1, mbs = mat->mbs, tmp;
1320     for (i = 0; i < mbs; i++) {
1321       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1322       if (max < tmp) max = tmp;
1323     }
1324     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1325   }
1326   lrow = row - brstart;
1327 
1328   pvA = &vworkA;
1329   pcA = &cworkA;
1330   pvB = &vworkB;
1331   pcB = &cworkB;
1332   if (!v) {
1333     pvA = NULL;
1334     pvB = NULL;
1335   }
1336   if (!idx) {
1337     pcA = NULL;
1338     if (!v) pcB = NULL;
1339   }
1340   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1341   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1342   nztot = nzA + nzB;
1343 
1344   cmap = mat->garray;
1345   if (v || idx) {
1346     if (nztot) {
1347       /* Sort by increasing column numbers, assuming A and B already sorted */
1348       PetscInt imark = -1;
1349       if (v) {
1350         *v = v_p = mat->rowvalues;
1351         for (i = 0; i < nzB; i++) {
1352           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1353           else break;
1354         }
1355         imark = i;
1356         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1357         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1358       }
1359       if (idx) {
1360         *idx = idx_p = mat->rowindices;
1361         if (imark > -1) {
1362           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1363         } else {
1364           for (i = 0; i < nzB; i++) {
1365             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1366             else break;
1367           }
1368           imark = i;
1369         }
1370         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1371         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1372       }
1373     } else {
1374       if (idx) *idx = NULL;
1375       if (v) *v = NULL;
1376     }
1377   }
1378   *nz = nztot;
1379   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1380   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1381   PetscFunctionReturn(PETSC_SUCCESS);
1382 }
1383 
1384 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1385 {
1386   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1387 
1388   PetscFunctionBegin;
1389   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1390   baij->getrowactive = PETSC_FALSE;
1391   PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393 
1394 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1395 {
1396   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1397 
1398   PetscFunctionBegin;
1399   PetscCall(MatZeroEntries(l->A));
1400   PetscCall(MatZeroEntries(l->B));
1401   PetscFunctionReturn(PETSC_SUCCESS);
1402 }
1403 
1404 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1405 {
1406   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1407   Mat            A = a->A, B = a->B;
1408   PetscLogDouble isend[5], irecv[5];
1409 
1410   PetscFunctionBegin;
1411   info->block_size = (PetscReal)matin->rmap->bs;
1412 
1413   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1414 
1415   isend[0] = info->nz_used;
1416   isend[1] = info->nz_allocated;
1417   isend[2] = info->nz_unneeded;
1418   isend[3] = info->memory;
1419   isend[4] = info->mallocs;
1420 
1421   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1422 
1423   isend[0] += info->nz_used;
1424   isend[1] += info->nz_allocated;
1425   isend[2] += info->nz_unneeded;
1426   isend[3] += info->memory;
1427   isend[4] += info->mallocs;
1428 
1429   if (flag == MAT_LOCAL) {
1430     info->nz_used      = isend[0];
1431     info->nz_allocated = isend[1];
1432     info->nz_unneeded  = isend[2];
1433     info->memory       = isend[3];
1434     info->mallocs      = isend[4];
1435   } else if (flag == MAT_GLOBAL_MAX) {
1436     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1437 
1438     info->nz_used      = irecv[0];
1439     info->nz_allocated = irecv[1];
1440     info->nz_unneeded  = irecv[2];
1441     info->memory       = irecv[3];
1442     info->mallocs      = irecv[4];
1443   } else if (flag == MAT_GLOBAL_SUM) {
1444     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1445 
1446     info->nz_used      = irecv[0];
1447     info->nz_allocated = irecv[1];
1448     info->nz_unneeded  = irecv[2];
1449     info->memory       = irecv[3];
1450     info->mallocs      = irecv[4];
1451   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1452   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1453   info->fill_ratio_needed = 0;
1454   info->factor_mallocs    = 0;
1455   PetscFunctionReturn(PETSC_SUCCESS);
1456 }
1457 
1458 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1459 {
1460   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1461 
1462   PetscFunctionBegin;
1463   switch (op) {
1464   case MAT_NEW_NONZERO_LOCATIONS:
1465   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1466   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1467   case MAT_KEEP_NONZERO_PATTERN:
1468   case MAT_NEW_NONZERO_LOCATION_ERR:
1469     MatCheckPreallocated(A, 1);
1470     PetscCall(MatSetOption(a->A, op, flg));
1471     PetscCall(MatSetOption(a->B, op, flg));
1472     break;
1473   case MAT_ROW_ORIENTED:
1474     MatCheckPreallocated(A, 1);
1475     a->roworiented = flg;
1476 
1477     PetscCall(MatSetOption(a->A, op, flg));
1478     PetscCall(MatSetOption(a->B, op, flg));
1479     break;
1480   case MAT_IGNORE_OFF_PROC_ENTRIES:
1481     a->donotstash = flg;
1482     break;
1483   case MAT_USE_HASH_TABLE:
1484     a->ht_flag = flg;
1485     a->ht_fact = 1.39;
1486     break;
1487   case MAT_SYMMETRIC:
1488   case MAT_STRUCTURALLY_SYMMETRIC:
1489   case MAT_HERMITIAN:
1490   case MAT_SYMMETRY_ETERNAL:
1491   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1492   case MAT_SPD_ETERNAL:
1493     /* if the diagonal matrix is square it inherits some of the properties above */
1494     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1495     break;
1496   default:
1497     break;
1498   }
1499   PetscFunctionReturn(PETSC_SUCCESS);
1500 }
1501 
1502 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1505   Mat_SeqBAIJ *Aloc;
1506   Mat          B;
1507   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1508   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1509   MatScalar   *a;
1510 
1511   PetscFunctionBegin;
1512   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1513   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1514     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1515     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1516     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1517     /* Do not know preallocation information, but must set block size */
1518     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1519   } else {
1520     B = *matout;
1521   }
1522 
1523   /* copy over the A part */
1524   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1525   ai   = Aloc->i;
1526   aj   = Aloc->j;
1527   a    = Aloc->a;
1528   PetscCall(PetscMalloc1(bs, &rvals));
1529 
1530   for (i = 0; i < mbs; i++) {
1531     rvals[0] = bs * (baij->rstartbs + i);
1532     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1533     for (j = ai[i]; j < ai[i + 1]; j++) {
1534       col = (baij->cstartbs + aj[j]) * bs;
1535       for (k = 0; k < bs; k++) {
1536         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1537 
1538         col++;
1539         a += bs;
1540       }
1541     }
1542   }
1543   /* copy over the B part */
1544   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1545   ai   = Aloc->i;
1546   aj   = Aloc->j;
1547   a    = Aloc->a;
1548   for (i = 0; i < mbs; i++) {
1549     rvals[0] = bs * (baij->rstartbs + i);
1550     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       col = baij->garray[aj[j]] * bs;
1553       for (k = 0; k < bs; k++) {
1554         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1555         col++;
1556         a += bs;
1557       }
1558     }
1559   }
1560   PetscCall(PetscFree(rvals));
1561   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1562   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1563 
1564   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1565   else PetscCall(MatHeaderMerge(A, &B));
1566   PetscFunctionReturn(PETSC_SUCCESS);
1567 }
1568 
1569 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1570 {
1571   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1572   Mat          a = baij->A, b = baij->B;
1573   PetscInt     s1, s2, s3;
1574 
1575   PetscFunctionBegin;
1576   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1577   if (rr) {
1578     PetscCall(VecGetLocalSize(rr, &s1));
1579     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1580     /* Overlap communication with computation. */
1581     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1582   }
1583   if (ll) {
1584     PetscCall(VecGetLocalSize(ll, &s1));
1585     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1586     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1587   }
1588   /* scale  the diagonal block */
1589   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1590 
1591   if (rr) {
1592     /* Do a scatter end and then right scale the off-diagonal block */
1593     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1594     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1595   }
1596   PetscFunctionReturn(PETSC_SUCCESS);
1597 }
1598 
1599 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1600 {
1601   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1602   PetscInt    *lrows;
1603   PetscInt     r, len;
1604   PetscBool    cong;
1605 
1606   PetscFunctionBegin;
1607   /* get locally owned rows */
1608   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1609   /* fix right-hand side if needed */
1610   if (x && b) {
1611     const PetscScalar *xx;
1612     PetscScalar       *bb;
1613 
1614     PetscCall(VecGetArrayRead(x, &xx));
1615     PetscCall(VecGetArray(b, &bb));
1616     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1617     PetscCall(VecRestoreArrayRead(x, &xx));
1618     PetscCall(VecRestoreArray(b, &bb));
1619   }
1620 
1621   /* actually zap the local rows */
1622   /*
1623         Zero the required rows. If the "diagonal block" of the matrix
1624      is square and the user wishes to set the diagonal we use separate
1625      code so that MatSetValues() is not called for each diagonal allocating
1626      new memory, thus calling lots of mallocs and slowing things down.
1627 
1628   */
1629   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1630   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1631   PetscCall(MatHasCongruentLayouts(A, &cong));
1632   if ((diag != 0.0) && cong) {
1633     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1634   } else if (diag != 0.0) {
1635     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1636     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1637     for (r = 0; r < len; ++r) {
1638       const PetscInt row = lrows[r] + A->rmap->rstart;
1639       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1640     }
1641     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1642     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1643   } else {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1645   }
1646   PetscCall(PetscFree(lrows));
1647 
1648   /* only change matrix nonzero state if pattern was allowed to be changed */
1649   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1650     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1651     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1652   }
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655 
1656 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1657 {
1658   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1659   PetscMPIInt        n, p = 0;
1660   PetscInt           i, j, k, r, len = 0, row, col, count;
1661   PetscInt          *lrows, *owners = A->rmap->range;
1662   PetscSFNode       *rrows;
1663   PetscSF            sf;
1664   const PetscScalar *xx;
1665   PetscScalar       *bb, *mask;
1666   Vec                xmask, lmask;
1667   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1668   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1669   PetscScalar       *aa;
1670 
1671   PetscFunctionBegin;
1672   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1673   /* Create SF where leaves are input rows and roots are owned rows */
1674   PetscCall(PetscMalloc1(n, &lrows));
1675   for (r = 0; r < n; ++r) lrows[r] = -1;
1676   PetscCall(PetscMalloc1(N, &rrows));
1677   for (r = 0; r < N; ++r) {
1678     const PetscInt idx = rows[r];
1679     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1680     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1681       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1682     }
1683     rrows[r].rank  = p;
1684     rrows[r].index = rows[r] - owners[p];
1685   }
1686   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1687   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1688   /* Collect flags for rows to be zeroed */
1689   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1690   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1691   PetscCall(PetscSFDestroy(&sf));
1692   /* Compress and put in row numbers */
1693   for (r = 0; r < n; ++r)
1694     if (lrows[r] >= 0) lrows[len++] = r;
1695   /* zero diagonal part of matrix */
1696   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1697   /* handle off-diagonal part of matrix */
1698   PetscCall(MatCreateVecs(A, &xmask, NULL));
1699   PetscCall(VecDuplicate(l->lvec, &lmask));
1700   PetscCall(VecGetArray(xmask, &bb));
1701   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1702   PetscCall(VecRestoreArray(xmask, &bb));
1703   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1704   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1705   PetscCall(VecDestroy(&xmask));
1706   if (x) {
1707     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1708     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1709     PetscCall(VecGetArrayRead(l->lvec, &xx));
1710     PetscCall(VecGetArray(b, &bb));
1711   }
1712   PetscCall(VecGetArray(lmask, &mask));
1713   /* remove zeroed rows of off-diagonal matrix */
1714   for (i = 0; i < len; ++i) {
1715     row   = lrows[i];
1716     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1717     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1718     for (k = 0; k < count; ++k) {
1719       aa[0] = 0.0;
1720       aa += bs;
1721     }
1722   }
1723   /* loop over all elements of off process part of matrix zeroing removed columns*/
1724   for (i = 0; i < l->B->rmap->N; ++i) {
1725     row = i / bs;
1726     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1727       for (k = 0; k < bs; ++k) {
1728         col = bs * baij->j[j] + k;
1729         if (PetscAbsScalar(mask[col])) {
1730           aa = baij->a + j * bs2 + (i % bs) + bs * k;
1731           if (x) bb[i] -= aa[0] * xx[col];
1732           aa[0] = 0.0;
1733         }
1734       }
1735     }
1736   }
1737   if (x) {
1738     PetscCall(VecRestoreArray(b, &bb));
1739     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1740   }
1741   PetscCall(VecRestoreArray(lmask, &mask));
1742   PetscCall(VecDestroy(&lmask));
1743   PetscCall(PetscFree(lrows));
1744 
1745   /* only change matrix nonzero state if pattern was allowed to be changed */
1746   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1747     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1748     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1749   }
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1754 {
1755   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1756 
1757   PetscFunctionBegin;
1758   PetscCall(MatSetUnfactored(a->A));
1759   PetscFunctionReturn(PETSC_SUCCESS);
1760 }
1761 
1762 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1763 
1764 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1765 {
1766   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1767   Mat          a, b, c, d;
1768   PetscBool    flg;
1769 
1770   PetscFunctionBegin;
1771   a = matA->A;
1772   b = matA->B;
1773   c = matB->A;
1774   d = matB->B;
1775 
1776   PetscCall(MatEqual(a, c, &flg));
1777   if (flg) PetscCall(MatEqual(b, d, &flg));
1778   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1779   PetscFunctionReturn(PETSC_SUCCESS);
1780 }
1781 
1782 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1783 {
1784   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1785   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1786 
1787   PetscFunctionBegin;
1788   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1789   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1790     PetscCall(MatCopy_Basic(A, B, str));
1791   } else {
1792     PetscCall(MatCopy(a->A, b->A, str));
1793     PetscCall(MatCopy(a->B, b->B, str));
1794   }
1795   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1796   PetscFunctionReturn(PETSC_SUCCESS);
1797 }
1798 
1799 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1800 {
1801   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1802   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1803   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1804 
1805   PetscFunctionBegin;
1806   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1811 {
1812   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1813   PetscBLASInt bnz, one                         = 1;
1814   Mat_SeqBAIJ *x, *y;
1815   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1816 
1817   PetscFunctionBegin;
1818   if (str == SAME_NONZERO_PATTERN) {
1819     PetscScalar alpha = a;
1820     x                 = (Mat_SeqBAIJ *)xx->A->data;
1821     y                 = (Mat_SeqBAIJ *)yy->A->data;
1822     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1823     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1824     x = (Mat_SeqBAIJ *)xx->B->data;
1825     y = (Mat_SeqBAIJ *)yy->B->data;
1826     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1827     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1828     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1829   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1830     PetscCall(MatAXPY_Basic(Y, a, X, str));
1831   } else {
1832     Mat       B;
1833     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1834     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1835     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1836     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1837     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1838     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1839     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1840     PetscCall(MatSetType(B, MATMPIBAIJ));
1841     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1842     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1843     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1844     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1845     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1846     PetscCall(MatHeaderMerge(Y, &B));
1847     PetscCall(PetscFree(nnz_d));
1848     PetscCall(PetscFree(nnz_o));
1849   }
1850   PetscFunctionReturn(PETSC_SUCCESS);
1851 }
1852 
1853 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1854 {
1855   PetscFunctionBegin;
1856   if (PetscDefined(USE_COMPLEX)) {
1857     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1858 
1859     PetscCall(MatConjugate_SeqBAIJ(a->A));
1860     PetscCall(MatConjugate_SeqBAIJ(a->B));
1861   }
1862   PetscFunctionReturn(PETSC_SUCCESS);
1863 }
1864 
1865 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1866 {
1867   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1868 
1869   PetscFunctionBegin;
1870   PetscCall(MatRealPart(a->A));
1871   PetscCall(MatRealPart(a->B));
1872   PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874 
1875 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1876 {
1877   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1878 
1879   PetscFunctionBegin;
1880   PetscCall(MatImaginaryPart(a->A));
1881   PetscCall(MatImaginaryPart(a->B));
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1886 {
1887   IS       iscol_local;
1888   PetscInt csize;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(ISGetLocalSize(iscol, &csize));
1892   if (call == MAT_REUSE_MATRIX) {
1893     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1894     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1895   } else {
1896     PetscCall(ISAllGather(iscol, &iscol_local));
1897   }
1898   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1899   if (call == MAT_INITIAL_MATRIX) {
1900     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1901     PetscCall(ISDestroy(&iscol_local));
1902   }
1903   PetscFunctionReturn(PETSC_SUCCESS);
1904 }
1905 
1906 /*
1907   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1908   in local and then by concatenating the local matrices the end result.
1909   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1910   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1911 */
1912 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1913 {
1914   PetscMPIInt  rank, size;
1915   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1916   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1917   Mat          M, Mreuse;
1918   MatScalar   *vwork, *aa;
1919   MPI_Comm     comm;
1920   IS           isrow_new, iscol_new;
1921   Mat_SeqBAIJ *aij;
1922 
1923   PetscFunctionBegin;
1924   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1925   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1926   PetscCallMPI(MPI_Comm_size(comm, &size));
1927   /* The compression and expansion should be avoided. Doesn't point
1928      out errors, might change the indices, hence buggey */
1929   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1930   if (isrow == iscol) {
1931     iscol_new = isrow_new;
1932     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1933   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1934 
1935   if (call == MAT_REUSE_MATRIX) {
1936     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1937     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1938     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1939   } else {
1940     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1941   }
1942   PetscCall(ISDestroy(&isrow_new));
1943   PetscCall(ISDestroy(&iscol_new));
1944   /*
1945       m - number of local rows
1946       n - number of columns (same on all processors)
1947       rstart - first row in new global matrix generated
1948   */
1949   PetscCall(MatGetBlockSize(mat, &bs));
1950   PetscCall(MatGetSize(Mreuse, &m, &n));
1951   m = m / bs;
1952   n = n / bs;
1953 
1954   if (call == MAT_INITIAL_MATRIX) {
1955     aij = (Mat_SeqBAIJ *)Mreuse->data;
1956     ii  = aij->i;
1957     jj  = aij->j;
1958 
1959     /*
1960         Determine the number of non-zeros in the diagonal and off-diagonal
1961         portions of the matrix in order to do correct preallocation
1962     */
1963 
1964     /* first get start and end of "diagonal" columns */
1965     if (csize == PETSC_DECIDE) {
1966       PetscCall(ISGetSize(isrow, &mglobal));
1967       if (mglobal == n * bs) { /* square matrix */
1968         nlocal = m;
1969       } else {
1970         nlocal = n / size + ((n % size) > rank);
1971       }
1972     } else {
1973       nlocal = csize / bs;
1974     }
1975     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1976     rstart = rend - nlocal;
1977     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1978 
1979     /* next, compute all the lengths */
1980     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1981     for (i = 0; i < m; i++) {
1982       jend = ii[i + 1] - ii[i];
1983       olen = 0;
1984       dlen = 0;
1985       for (j = 0; j < jend; j++) {
1986         if (*jj < rstart || *jj >= rend) olen++;
1987         else dlen++;
1988         jj++;
1989       }
1990       olens[i] = olen;
1991       dlens[i] = dlen;
1992     }
1993     PetscCall(MatCreate(comm, &M));
1994     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1995     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1996     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1997     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1998     PetscCall(PetscFree2(dlens, olens));
1999   } else {
2000     PetscInt ml, nl;
2001 
2002     M = *newmat;
2003     PetscCall(MatGetLocalSize(M, &ml, &nl));
2004     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2005     PetscCall(MatZeroEntries(M));
2006     /*
2007          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2008        rather than the slower MatSetValues().
2009     */
2010     M->was_assembled = PETSC_TRUE;
2011     M->assembled     = PETSC_FALSE;
2012   }
2013   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2014   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2015   aij = (Mat_SeqBAIJ *)Mreuse->data;
2016   ii  = aij->i;
2017   jj  = aij->j;
2018   aa  = aij->a;
2019   for (i = 0; i < m; i++) {
2020     row   = rstart / bs + i;
2021     nz    = ii[i + 1] - ii[i];
2022     cwork = jj;
2023     jj    = PetscSafePointerPlusOffset(jj, nz);
2024     vwork = aa;
2025     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2026     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2027   }
2028 
2029   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2030   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2031   *newmat = M;
2032 
2033   /* save submatrix used in processor for next request */
2034   if (call == MAT_INITIAL_MATRIX) {
2035     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2036     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2037   }
2038   PetscFunctionReturn(PETSC_SUCCESS);
2039 }
2040 
2041 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2042 {
2043   MPI_Comm        comm, pcomm;
2044   PetscInt        clocal_size, nrows;
2045   const PetscInt *rows;
2046   PetscMPIInt     size;
2047   IS              crowp, lcolp;
2048 
2049   PetscFunctionBegin;
2050   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2051   /* make a collective version of 'rowp' */
2052   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2053   if (pcomm == comm) {
2054     crowp = rowp;
2055   } else {
2056     PetscCall(ISGetSize(rowp, &nrows));
2057     PetscCall(ISGetIndices(rowp, &rows));
2058     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2059     PetscCall(ISRestoreIndices(rowp, &rows));
2060   }
2061   PetscCall(ISSetPermutation(crowp));
2062   /* make a local version of 'colp' */
2063   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2064   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2065   if (size == 1) {
2066     lcolp = colp;
2067   } else {
2068     PetscCall(ISAllGather(colp, &lcolp));
2069   }
2070   PetscCall(ISSetPermutation(lcolp));
2071   /* now we just get the submatrix */
2072   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2073   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2074   /* clean up */
2075   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2076   if (size > 1) PetscCall(ISDestroy(&lcolp));
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2081 {
2082   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2083   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2084 
2085   PetscFunctionBegin;
2086   if (nghosts) *nghosts = B->nbs;
2087   if (ghosts) *ghosts = baij->garray;
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2092 {
2093   Mat          B;
2094   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2095   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2096   Mat_SeqAIJ  *b;
2097   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2098   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2099   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2100 
2101   PetscFunctionBegin;
2102   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2103   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2104 
2105   /*   Tell every processor the number of nonzeros per row  */
2106   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2107   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2108   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2109   displs = recvcounts + size;
2110   for (i = 0; i < size; i++) {
2111     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2112     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2113   }
2114   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2115   /* Create the sequential matrix of the same type as the local block diagonal  */
2116   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2117   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2118   PetscCall(MatSetType(B, MATSEQAIJ));
2119   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2120   b = (Mat_SeqAIJ *)B->data;
2121 
2122   /*     Copy my part of matrix column indices over  */
2123   sendcount  = ad->nz + bd->nz;
2124   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2125   a_jsendbuf = ad->j;
2126   b_jsendbuf = bd->j;
2127   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2128   cnt        = 0;
2129   for (i = 0; i < n; i++) {
2130     /* put in lower diagonal portion */
2131     m = bd->i[i + 1] - bd->i[i];
2132     while (m > 0) {
2133       /* is it above diagonal (in bd (compressed) numbering) */
2134       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2135       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2136       m--;
2137     }
2138 
2139     /* put in diagonal portion */
2140     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2141 
2142     /* put in upper diagonal portion */
2143     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2144   }
2145   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2146 
2147   /*  Gather all column indices to all processors  */
2148   for (i = 0; i < size; i++) {
2149     recvcounts[i] = 0;
2150     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2151   }
2152   displs[0] = 0;
2153   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2154   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2155   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2156   /* set the b->ilen (length of each row) values */
2157   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2158   /* set the b->i indices */
2159   b->i[0] = 0;
2160   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2161   PetscCall(PetscFree(lens));
2162   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2163   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2164   PetscCall(PetscFree(recvcounts));
2165 
2166   PetscCall(MatPropagateSymmetryOptions(A, B));
2167   *newmat = B;
2168   PetscFunctionReturn(PETSC_SUCCESS);
2169 }
2170 
2171 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2172 {
2173   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2174   Vec          bb1 = NULL;
2175 
2176   PetscFunctionBegin;
2177   if (flag == SOR_APPLY_UPPER) {
2178     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2179     PetscFunctionReturn(PETSC_SUCCESS);
2180   }
2181 
2182   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2183 
2184   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2185     if (flag & SOR_ZERO_INITIAL_GUESS) {
2186       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2187       its--;
2188     }
2189 
2190     while (its--) {
2191       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2192       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2193 
2194       /* update rhs: bb1 = bb - B*x */
2195       PetscCall(VecScale(mat->lvec, -1.0));
2196       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2197 
2198       /* local sweep */
2199       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2200     }
2201   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2202     if (flag & SOR_ZERO_INITIAL_GUESS) {
2203       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2204       its--;
2205     }
2206     while (its--) {
2207       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2208       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2209 
2210       /* update rhs: bb1 = bb - B*x */
2211       PetscCall(VecScale(mat->lvec, -1.0));
2212       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2213 
2214       /* local sweep */
2215       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2216     }
2217   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2218     if (flag & SOR_ZERO_INITIAL_GUESS) {
2219       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2220       its--;
2221     }
2222     while (its--) {
2223       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2224       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2225 
2226       /* update rhs: bb1 = bb - B*x */
2227       PetscCall(VecScale(mat->lvec, -1.0));
2228       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2229 
2230       /* local sweep */
2231       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2232     }
2233   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2234 
2235   PetscCall(VecDestroy(&bb1));
2236   PetscFunctionReturn(PETSC_SUCCESS);
2237 }
2238 
2239 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2240 {
2241   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2242   PetscInt     m, N, i, *garray = aij->garray;
2243   PetscInt     ib, jb, bs = A->rmap->bs;
2244   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2245   MatScalar   *a_val = a_aij->a;
2246   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2247   MatScalar   *b_val = b_aij->a;
2248   PetscReal   *work;
2249 
2250   PetscFunctionBegin;
2251   PetscCall(MatGetSize(A, &m, &N));
2252   PetscCall(PetscCalloc1(N, &work));
2253   if (type == NORM_2) {
2254     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2255       for (jb = 0; jb < bs; jb++) {
2256         for (ib = 0; ib < bs; ib++) {
2257           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2258           a_val++;
2259         }
2260       }
2261     }
2262     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2263       for (jb = 0; jb < bs; jb++) {
2264         for (ib = 0; ib < bs; ib++) {
2265           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2266           b_val++;
2267         }
2268       }
2269     }
2270   } else if (type == NORM_1) {
2271     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2272       for (jb = 0; jb < bs; jb++) {
2273         for (ib = 0; ib < bs; ib++) {
2274           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2275           a_val++;
2276         }
2277       }
2278     }
2279     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2280       for (jb = 0; jb < bs; jb++) {
2281         for (ib = 0; ib < bs; ib++) {
2282           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2283           b_val++;
2284         }
2285       }
2286     }
2287   } else if (type == NORM_INFINITY) {
2288     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2289       for (jb = 0; jb < bs; jb++) {
2290         for (ib = 0; ib < bs; ib++) {
2291           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2292           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2293           a_val++;
2294         }
2295       }
2296     }
2297     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2298       for (jb = 0; jb < bs; jb++) {
2299         for (ib = 0; ib < bs; ib++) {
2300           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2301           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2302           b_val++;
2303         }
2304       }
2305     }
2306   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2307     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2308       for (jb = 0; jb < bs; jb++) {
2309         for (ib = 0; ib < bs; ib++) {
2310           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2311           a_val++;
2312         }
2313       }
2314     }
2315     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2316       for (jb = 0; jb < bs; jb++) {
2317         for (ib = 0; ib < bs; ib++) {
2318           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2319           b_val++;
2320         }
2321       }
2322     }
2323   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2324     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2325       for (jb = 0; jb < bs; jb++) {
2326         for (ib = 0; ib < bs; ib++) {
2327           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2328           a_val++;
2329         }
2330       }
2331     }
2332     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2333       for (jb = 0; jb < bs; jb++) {
2334         for (ib = 0; ib < bs; ib++) {
2335           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2336           b_val++;
2337         }
2338       }
2339     }
2340   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2341   if (type == NORM_INFINITY) {
2342     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2343   } else {
2344     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2345   }
2346   PetscCall(PetscFree(work));
2347   if (type == NORM_2) {
2348     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2349   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2350     for (i = 0; i < N; i++) reductions[i] /= m;
2351   }
2352   PetscFunctionReturn(PETSC_SUCCESS);
2353 }
2354 
2355 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2356 {
2357   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2358 
2359   PetscFunctionBegin;
2360   PetscCall(MatInvertBlockDiagonal(a->A, values));
2361   A->factorerrortype             = a->A->factorerrortype;
2362   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2363   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2364   PetscFunctionReturn(PETSC_SUCCESS);
2365 }
2366 
2367 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2368 {
2369   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2370   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2371 
2372   PetscFunctionBegin;
2373   if (!Y->preallocated) {
2374     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2375   } else if (!aij->nz) {
2376     PetscInt nonew = aij->nonew;
2377     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2378     aij->nonew = nonew;
2379   }
2380   PetscCall(MatShift_Basic(Y, a));
2381   PetscFunctionReturn(PETSC_SUCCESS);
2382 }
2383 
2384 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2385 {
2386   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2387 
2388   PetscFunctionBegin;
2389   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2390   PetscCall(MatMissingDiagonal(a->A, missing, d));
2391   if (d) {
2392     PetscInt rstart;
2393     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2394     *d += rstart / A->rmap->bs;
2395   }
2396   PetscFunctionReturn(PETSC_SUCCESS);
2397 }
2398 
2399 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2400 {
2401   PetscFunctionBegin;
2402   *a = ((Mat_MPIBAIJ *)A->data)->A;
2403   PetscFunctionReturn(PETSC_SUCCESS);
2404 }
2405 
2406 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2407 {
2408   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2409 
2410   PetscFunctionBegin;
2411   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2412   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2413   PetscFunctionReturn(PETSC_SUCCESS);
2414 }
2415 
2416 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2417                                        MatGetRow_MPIBAIJ,
2418                                        MatRestoreRow_MPIBAIJ,
2419                                        MatMult_MPIBAIJ,
2420                                        /* 4*/ MatMultAdd_MPIBAIJ,
2421                                        MatMultTranspose_MPIBAIJ,
2422                                        MatMultTransposeAdd_MPIBAIJ,
2423                                        NULL,
2424                                        NULL,
2425                                        NULL,
2426                                        /*10*/ NULL,
2427                                        NULL,
2428                                        NULL,
2429                                        MatSOR_MPIBAIJ,
2430                                        MatTranspose_MPIBAIJ,
2431                                        /*15*/ MatGetInfo_MPIBAIJ,
2432                                        MatEqual_MPIBAIJ,
2433                                        MatGetDiagonal_MPIBAIJ,
2434                                        MatDiagonalScale_MPIBAIJ,
2435                                        MatNorm_MPIBAIJ,
2436                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2437                                        MatAssemblyEnd_MPIBAIJ,
2438                                        MatSetOption_MPIBAIJ,
2439                                        MatZeroEntries_MPIBAIJ,
2440                                        /*24*/ MatZeroRows_MPIBAIJ,
2441                                        NULL,
2442                                        NULL,
2443                                        NULL,
2444                                        NULL,
2445                                        /*29*/ MatSetUp_MPI_Hash,
2446                                        NULL,
2447                                        NULL,
2448                                        MatGetDiagonalBlock_MPIBAIJ,
2449                                        NULL,
2450                                        /*34*/ MatDuplicate_MPIBAIJ,
2451                                        NULL,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        /*39*/ MatAXPY_MPIBAIJ,
2456                                        MatCreateSubMatrices_MPIBAIJ,
2457                                        MatIncreaseOverlap_MPIBAIJ,
2458                                        MatGetValues_MPIBAIJ,
2459                                        MatCopy_MPIBAIJ,
2460                                        /*44*/ NULL,
2461                                        MatScale_MPIBAIJ,
2462                                        MatShift_MPIBAIJ,
2463                                        NULL,
2464                                        MatZeroRowsColumns_MPIBAIJ,
2465                                        /*49*/ NULL,
2466                                        NULL,
2467                                        NULL,
2468                                        NULL,
2469                                        NULL,
2470                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2471                                        NULL,
2472                                        MatSetUnfactored_MPIBAIJ,
2473                                        MatPermute_MPIBAIJ,
2474                                        MatSetValuesBlocked_MPIBAIJ,
2475                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2476                                        MatDestroy_MPIBAIJ,
2477                                        MatView_MPIBAIJ,
2478                                        NULL,
2479                                        NULL,
2480                                        /*64*/ NULL,
2481                                        NULL,
2482                                        NULL,
2483                                        NULL,
2484                                        MatGetRowMaxAbs_MPIBAIJ,
2485                                        /*69*/ NULL,
2486                                        NULL,
2487                                        NULL,
2488                                        MatFDColoringApply_BAIJ,
2489                                        NULL,
2490                                        /*74*/ NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        MatLoad_MPIBAIJ,
2495                                        /*79*/ NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        /*84*/ NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        NULL,
2505                                        /*89*/ NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        MatConjugate_MPIBAIJ,
2510                                        /*94*/ NULL,
2511                                        NULL,
2512                                        MatRealPart_MPIBAIJ,
2513                                        MatImaginaryPart_MPIBAIJ,
2514                                        NULL,
2515                                        /*99*/ NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        /*104*/ MatMissingDiagonal_MPIBAIJ,
2521                                        MatGetSeqNonzeroStructure_MPIBAIJ,
2522                                        NULL,
2523                                        MatGetGhosts_MPIBAIJ,
2524                                        NULL,
2525                                        /*109*/ NULL,
2526                                        NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        /*114*/ MatGetMultiProcBlock_MPIBAIJ,
2531                                        NULL,
2532                                        MatGetColumnReductions_MPIBAIJ,
2533                                        MatInvertBlockDiagonal_MPIBAIJ,
2534                                        NULL,
2535                                        /*119*/ NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        /*124*/ NULL,
2541                                        NULL,
2542                                        NULL,
2543                                        MatSetBlockSizes_Default,
2544                                        NULL,
2545                                        /*129*/ MatFDColoringSetUp_MPIXAIJ,
2546                                        NULL,
2547                                        MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2548                                        NULL,
2549                                        NULL,
2550                                        /*134*/ NULL,
2551                                        NULL,
2552                                        NULL,
2553                                        MatEliminateZeros_MPIBAIJ,
2554                                        MatGetRowSumAbs_MPIBAIJ,
2555                                        /*139*/ NULL,
2556                                        NULL,
2557                                        NULL,
2558                                        MatCopyHashToXAIJ_MPI_Hash,
2559                                        NULL};
2560 
2561 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2562 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2563 
2564 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2565 {
2566   PetscInt        m, rstart, cstart, cend;
2567   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2568   const PetscInt *JJ          = NULL;
2569   PetscScalar    *values      = NULL;
2570   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2571   PetscBool       nooffprocentries;
2572 
2573   PetscFunctionBegin;
2574   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2575   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2576   PetscCall(PetscLayoutSetUp(B->rmap));
2577   PetscCall(PetscLayoutSetUp(B->cmap));
2578   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2579   m      = B->rmap->n / bs;
2580   rstart = B->rmap->rstart / bs;
2581   cstart = B->cmap->rstart / bs;
2582   cend   = B->cmap->rend / bs;
2583 
2584   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2585   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2586   for (i = 0; i < m; i++) {
2587     nz = ii[i + 1] - ii[i];
2588     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2589     nz_max = PetscMax(nz_max, nz);
2590     dlen   = 0;
2591     olen   = 0;
2592     JJ     = jj + ii[i];
2593     for (j = 0; j < nz; j++) {
2594       if (*JJ < cstart || *JJ >= cend) olen++;
2595       else dlen++;
2596       JJ++;
2597     }
2598     d_nnz[i] = dlen;
2599     o_nnz[i] = olen;
2600   }
2601   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2602   PetscCall(PetscFree2(d_nnz, o_nnz));
2603 
2604   values = (PetscScalar *)V;
2605   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2606   for (i = 0; i < m; i++) {
2607     PetscInt        row   = i + rstart;
2608     PetscInt        ncols = ii[i + 1] - ii[i];
2609     const PetscInt *icols = jj + ii[i];
2610     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2611       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2612       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2613     } else { /* block ordering does not match so we can only insert one block at a time. */
2614       PetscInt j;
2615       for (j = 0; j < ncols; j++) {
2616         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2617         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2618       }
2619     }
2620   }
2621 
2622   if (!V) PetscCall(PetscFree(values));
2623   nooffprocentries    = B->nooffprocentries;
2624   B->nooffprocentries = PETSC_TRUE;
2625   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2626   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2627   B->nooffprocentries = nooffprocentries;
2628 
2629   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2630   PetscFunctionReturn(PETSC_SUCCESS);
2631 }
2632 
2633 /*@C
2634   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2635 
2636   Collective
2637 
2638   Input Parameters:
2639 + B  - the matrix
2640 . bs - the block size
2641 . i  - the indices into `j` for the start of each local row (starts with zero)
2642 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2643 - v  - optional values in the matrix, use `NULL` if not provided
2644 
2645   Level: advanced
2646 
2647   Notes:
2648   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2649   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2650   called this routine.
2651 
2652   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2653   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2654   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2655   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2656   block column and the second index is over columns within a block.
2657 
2658   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2659 
2660 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2661 @*/
2662 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2663 {
2664   PetscFunctionBegin;
2665   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2666   PetscValidType(B, 1);
2667   PetscValidLogicalCollectiveInt(B, bs, 2);
2668   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2673 {
2674   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2675   PetscInt     i;
2676   PetscMPIInt  size;
2677 
2678   PetscFunctionBegin;
2679   if (B->hash_active) {
2680     B->ops[0]      = b->cops;
2681     B->hash_active = PETSC_FALSE;
2682   }
2683   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2684   PetscCall(MatSetBlockSize(B, bs));
2685   PetscCall(PetscLayoutSetUp(B->rmap));
2686   PetscCall(PetscLayoutSetUp(B->cmap));
2687   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2688 
2689   if (d_nnz) {
2690     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2691   }
2692   if (o_nnz) {
2693     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2694   }
2695 
2696   b->bs2 = bs * bs;
2697   b->mbs = B->rmap->n / bs;
2698   b->nbs = B->cmap->n / bs;
2699   b->Mbs = B->rmap->N / bs;
2700   b->Nbs = B->cmap->N / bs;
2701 
2702   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2703   b->rstartbs = B->rmap->rstart / bs;
2704   b->rendbs   = B->rmap->rend / bs;
2705   b->cstartbs = B->cmap->rstart / bs;
2706   b->cendbs   = B->cmap->rend / bs;
2707 
2708 #if defined(PETSC_USE_CTABLE)
2709   PetscCall(PetscHMapIDestroy(&b->colmap));
2710 #else
2711   PetscCall(PetscFree(b->colmap));
2712 #endif
2713   PetscCall(PetscFree(b->garray));
2714   PetscCall(VecDestroy(&b->lvec));
2715   PetscCall(VecScatterDestroy(&b->Mvctx));
2716 
2717   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2718 
2719   MatSeqXAIJGetOptions_Private(b->B);
2720   PetscCall(MatDestroy(&b->B));
2721   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2722   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2723   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2724   MatSeqXAIJRestoreOptions_Private(b->B);
2725 
2726   MatSeqXAIJGetOptions_Private(b->A);
2727   PetscCall(MatDestroy(&b->A));
2728   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2729   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2730   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2731   MatSeqXAIJRestoreOptions_Private(b->A);
2732 
2733   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2734   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2735   B->preallocated  = PETSC_TRUE;
2736   B->was_assembled = PETSC_FALSE;
2737   B->assembled     = PETSC_FALSE;
2738   PetscFunctionReturn(PETSC_SUCCESS);
2739 }
2740 
2741 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2742 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2743 
2744 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2745 {
2746   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2747   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2748   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2749   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2750 
2751   PetscFunctionBegin;
2752   PetscCall(PetscMalloc1(M + 1, &ii));
2753   ii[0] = 0;
2754   for (i = 0; i < M; i++) {
2755     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2756     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2757     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2758     /* remove one from count of matrix has diagonal */
2759     for (j = id[i]; j < id[i + 1]; j++) {
2760       if (jd[j] == i) {
2761         ii[i + 1]--;
2762         break;
2763       }
2764     }
2765   }
2766   PetscCall(PetscMalloc1(ii[M], &jj));
2767   cnt = 0;
2768   for (i = 0; i < M; i++) {
2769     for (j = io[i]; j < io[i + 1]; j++) {
2770       if (garray[jo[j]] > rstart) break;
2771       jj[cnt++] = garray[jo[j]];
2772     }
2773     for (k = id[i]; k < id[i + 1]; k++) {
2774       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2775     }
2776     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2777   }
2778   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2779   PetscFunctionReturn(PETSC_SUCCESS);
2780 }
2781 
2782 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2783 
2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2785 
2786 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2787 {
2788   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2789   Mat_MPIAIJ  *b;
2790   Mat          B;
2791 
2792   PetscFunctionBegin;
2793   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2794 
2795   if (reuse == MAT_REUSE_MATRIX) {
2796     B = *newmat;
2797   } else {
2798     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2799     PetscCall(MatSetType(B, MATMPIAIJ));
2800     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2801     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2802     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2803     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2804   }
2805   b = (Mat_MPIAIJ *)B->data;
2806 
2807   if (reuse == MAT_REUSE_MATRIX) {
2808     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2809     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2810   } else {
2811     PetscInt   *garray = a->garray;
2812     Mat_SeqAIJ *bB;
2813     PetscInt    bs, nnz;
2814     PetscCall(MatDestroy(&b->A));
2815     PetscCall(MatDestroy(&b->B));
2816     /* just clear out the data structure */
2817     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2818     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2819     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2820 
2821     /* Global numbering for b->B columns */
2822     bB  = (Mat_SeqAIJ *)b->B->data;
2823     bs  = A->rmap->bs;
2824     nnz = bB->i[A->rmap->n];
2825     for (PetscInt k = 0; k < nnz; k++) {
2826       PetscInt bj = bB->j[k] / bs;
2827       PetscInt br = bB->j[k] % bs;
2828       bB->j[k]    = garray[bj] * bs + br;
2829     }
2830   }
2831   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2832   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2833 
2834   if (reuse == MAT_INPLACE_MATRIX) {
2835     PetscCall(MatHeaderReplace(A, &B));
2836   } else {
2837     *newmat = B;
2838   }
2839   PetscFunctionReturn(PETSC_SUCCESS);
2840 }
2841 
2842 /*MC
2843    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2844 
2845    Options Database Keys:
2846 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2847 . -mat_block_size <bs> - set the blocksize used to store the matrix
2848 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2849 - -mat_use_hash_table <fact> - set hash table factor
2850 
2851    Level: beginner
2852 
2853    Note:
2854     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2855     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2856 
2857 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2858 M*/
2859 
2860 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2861 
2862 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2863 {
2864   Mat_MPIBAIJ *b;
2865   PetscBool    flg = PETSC_FALSE;
2866 
2867   PetscFunctionBegin;
2868   PetscCall(PetscNew(&b));
2869   B->data      = (void *)b;
2870   B->ops[0]    = MatOps_Values;
2871   B->assembled = PETSC_FALSE;
2872 
2873   B->insertmode = NOT_SET_VALUES;
2874   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2875   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2876 
2877   /* build local table of row and column ownerships */
2878   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2879 
2880   /* build cache for off array entries formed */
2881   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2882 
2883   b->donotstash  = PETSC_FALSE;
2884   b->colmap      = NULL;
2885   b->garray      = NULL;
2886   b->roworiented = PETSC_TRUE;
2887 
2888   /* stuff used in block assembly */
2889   b->barray = NULL;
2890 
2891   /* stuff used for matrix vector multiply */
2892   b->lvec  = NULL;
2893   b->Mvctx = NULL;
2894 
2895   /* stuff for MatGetRow() */
2896   b->rowindices   = NULL;
2897   b->rowvalues    = NULL;
2898   b->getrowactive = PETSC_FALSE;
2899 
2900   /* hash table stuff */
2901   b->ht           = NULL;
2902   b->hd           = NULL;
2903   b->ht_size      = 0;
2904   b->ht_flag      = PETSC_FALSE;
2905   b->ht_fact      = 0;
2906   b->ht_total_ct  = 0;
2907   b->ht_insert_ct = 0;
2908 
2909   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2910   b->ijonly = PETSC_FALSE;
2911 
2912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2915 #if defined(PETSC_HAVE_HYPRE)
2916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2917 #endif
2918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2925   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2926 
2927   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2928   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2929   if (flg) {
2930     PetscReal fact = 1.39;
2931     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2932     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2933     if (fact <= 1.0) fact = 1.39;
2934     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2935     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2936   }
2937   PetscOptionsEnd();
2938   PetscFunctionReturn(PETSC_SUCCESS);
2939 }
2940 
2941 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2942 /*MC
2943    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2944 
2945    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2946    and `MATMPIBAIJ` otherwise.
2947 
2948    Options Database Keys:
2949 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2950 
2951   Level: beginner
2952 
2953 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2954 M*/
2955 
2956 /*@
2957   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2958   (block compressed row).
2959 
2960   Collective
2961 
2962   Input Parameters:
2963 + B     - the matrix
2964 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2965           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2966 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2967            submatrix  (same for all local rows)
2968 . d_nnz - array containing the number of block nonzeros in the various block rows
2969            of the in diagonal portion of the local (possibly different for each block
2970            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2971            set it even if it is zero.
2972 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2973            submatrix (same for all local rows).
2974 - o_nnz - array containing the number of nonzeros in the various block rows of the
2975            off-diagonal portion of the local submatrix (possibly different for
2976            each block row) or `NULL`.
2977 
2978    If the *_nnz parameter is given then the *_nz parameter is ignored
2979 
2980   Options Database Keys:
2981 + -mat_block_size            - size of the blocks to use
2982 - -mat_use_hash_table <fact> - set hash table factor
2983 
2984   Level: intermediate
2985 
2986   Notes:
2987   For good matrix assembly performance
2988   the user should preallocate the matrix storage by setting the parameters
2989   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2990   performance can be increased by more than a factor of 50.
2991 
2992   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2993   than it must be used on all processors that share the object for that argument.
2994 
2995   Storage Information:
2996   For a square global matrix we define each processor's diagonal portion
2997   to be its local rows and the corresponding columns (a square submatrix);
2998   each processor's off-diagonal portion encompasses the remainder of the
2999   local matrix (a rectangular submatrix).
3000 
3001   The user can specify preallocated storage for the diagonal part of
3002   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3003   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3004   memory allocation.  Likewise, specify preallocated storage for the
3005   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3006 
3007   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3008   the figure below we depict these three local rows and all columns (0-11).
3009 
3010 .vb
3011            0 1 2 3 4 5 6 7 8 9 10 11
3012           --------------------------
3013    row 3  |o o o d d d o o o o  o  o
3014    row 4  |o o o d d d o o o o  o  o
3015    row 5  |o o o d d d o o o o  o  o
3016           --------------------------
3017 .ve
3018 
3019   Thus, any entries in the d locations are stored in the d (diagonal)
3020   submatrix, and any entries in the o locations are stored in the
3021   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3022   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3023 
3024   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3025   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3026   In general, for PDE problems in which most nonzeros are near the diagonal,
3027   one expects `d_nz` >> `o_nz`.
3028 
3029   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3030   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3031   You can also run with the option `-info` and look for messages with the string
3032   malloc in them to see if additional memory allocation was needed.
3033 
3034 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3035 @*/
3036 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3037 {
3038   PetscFunctionBegin;
3039   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3040   PetscValidType(B, 1);
3041   PetscValidLogicalCollectiveInt(B, bs, 2);
3042   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3043   PetscFunctionReturn(PETSC_SUCCESS);
3044 }
3045 
3046 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3047 /*@
3048   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3049   (block compressed row).
3050 
3051   Collective
3052 
3053   Input Parameters:
3054 + comm  - MPI communicator
3055 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3056           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3057 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3058           This value should be the same as the local size used in creating the
3059           y vector for the matrix-vector product y = Ax.
3060 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3061           This value should be the same as the local size used in creating the
3062           x vector for the matrix-vector product y = Ax.
3063 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3064 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3065 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3066           submatrix  (same for all local rows)
3067 . d_nnz - array containing the number of nonzero blocks in the various block rows
3068           of the in diagonal portion of the local (possibly different for each block
3069           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3070           and set it even if it is zero.
3071 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3072           submatrix (same for all local rows).
3073 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3074           off-diagonal portion of the local submatrix (possibly different for
3075           each block row) or NULL.
3076 
3077   Output Parameter:
3078 . A - the matrix
3079 
3080   Options Database Keys:
3081 + -mat_block_size            - size of the blocks to use
3082 - -mat_use_hash_table <fact> - set hash table factor
3083 
3084   Level: intermediate
3085 
3086   Notes:
3087   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3088   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3089   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3090 
3091   For good matrix assembly performance
3092   the user should preallocate the matrix storage by setting the parameters
3093   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3094   performance can be increased by more than a factor of 50.
3095 
3096   If the *_nnz parameter is given then the *_nz parameter is ignored
3097 
3098   A nonzero block is any block that as 1 or more nonzeros in it
3099 
3100   The user MUST specify either the local or global matrix dimensions
3101   (possibly both).
3102 
3103   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3104   than it must be used on all processors that share the object for that argument.
3105 
3106   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3107   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3108 
3109   Storage Information:
3110   For a square global matrix we define each processor's diagonal portion
3111   to be its local rows and the corresponding columns (a square submatrix);
3112   each processor's off-diagonal portion encompasses the remainder of the
3113   local matrix (a rectangular submatrix).
3114 
3115   The user can specify preallocated storage for the diagonal part of
3116   the local submatrix with either d_nz or d_nnz (not both).  Set
3117   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3118   memory allocation.  Likewise, specify preallocated storage for the
3119   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3120 
3121   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3122   the figure below we depict these three local rows and all columns (0-11).
3123 
3124 .vb
3125            0 1 2 3 4 5 6 7 8 9 10 11
3126           --------------------------
3127    row 3  |o o o d d d o o o o  o  o
3128    row 4  |o o o d d d o o o o  o  o
3129    row 5  |o o o d d d o o o o  o  o
3130           --------------------------
3131 .ve
3132 
3133   Thus, any entries in the d locations are stored in the d (diagonal)
3134   submatrix, and any entries in the o locations are stored in the
3135   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3136   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3137 
3138   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3139   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3140   In general, for PDE problems in which most nonzeros are near the diagonal,
3141   one expects `d_nz` >> `o_nz`.
3142 
3143 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3144           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3145 @*/
3146 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3147 {
3148   PetscMPIInt size;
3149 
3150   PetscFunctionBegin;
3151   PetscCall(MatCreate(comm, A));
3152   PetscCall(MatSetSizes(*A, m, n, M, N));
3153   PetscCallMPI(MPI_Comm_size(comm, &size));
3154   if (size > 1) {
3155     PetscCall(MatSetType(*A, MATMPIBAIJ));
3156     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3157   } else {
3158     PetscCall(MatSetType(*A, MATSEQBAIJ));
3159     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3160   }
3161   PetscFunctionReturn(PETSC_SUCCESS);
3162 }
3163 
3164 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3165 {
3166   Mat          mat;
3167   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3168   PetscInt     len = 0;
3169 
3170   PetscFunctionBegin;
3171   *newmat = NULL;
3172   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3173   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3174   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3175 
3176   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3177   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3178   if (matin->hash_active) {
3179     PetscCall(MatSetUp(mat));
3180   } else {
3181     mat->factortype   = matin->factortype;
3182     mat->preallocated = PETSC_TRUE;
3183     mat->assembled    = PETSC_TRUE;
3184     mat->insertmode   = NOT_SET_VALUES;
3185 
3186     a             = (Mat_MPIBAIJ *)mat->data;
3187     mat->rmap->bs = matin->rmap->bs;
3188     a->bs2        = oldmat->bs2;
3189     a->mbs        = oldmat->mbs;
3190     a->nbs        = oldmat->nbs;
3191     a->Mbs        = oldmat->Mbs;
3192     a->Nbs        = oldmat->Nbs;
3193 
3194     a->size         = oldmat->size;
3195     a->rank         = oldmat->rank;
3196     a->donotstash   = oldmat->donotstash;
3197     a->roworiented  = oldmat->roworiented;
3198     a->rowindices   = NULL;
3199     a->rowvalues    = NULL;
3200     a->getrowactive = PETSC_FALSE;
3201     a->barray       = NULL;
3202     a->rstartbs     = oldmat->rstartbs;
3203     a->rendbs       = oldmat->rendbs;
3204     a->cstartbs     = oldmat->cstartbs;
3205     a->cendbs       = oldmat->cendbs;
3206 
3207     /* hash table stuff */
3208     a->ht           = NULL;
3209     a->hd           = NULL;
3210     a->ht_size      = 0;
3211     a->ht_flag      = oldmat->ht_flag;
3212     a->ht_fact      = oldmat->ht_fact;
3213     a->ht_total_ct  = 0;
3214     a->ht_insert_ct = 0;
3215 
3216     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3217     if (oldmat->colmap) {
3218 #if defined(PETSC_USE_CTABLE)
3219       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3220 #else
3221       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3222       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3223 #endif
3224     } else a->colmap = NULL;
3225 
3226     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3227       PetscCall(PetscMalloc1(len, &a->garray));
3228       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3229     } else a->garray = NULL;
3230 
3231     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3232     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3233     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3234 
3235     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3236     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3237   }
3238   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3239   *newmat = mat;
3240   PetscFunctionReturn(PETSC_SUCCESS);
3241 }
3242 
3243 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3244 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3245 {
3246   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3247   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3248   PetscScalar *matvals;
3249 
3250   PetscFunctionBegin;
3251   PetscCall(PetscViewerSetUp(viewer));
3252 
3253   /* read in matrix header */
3254   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3255   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3256   M  = header[1];
3257   N  = header[2];
3258   nz = header[3];
3259   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3260   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3261   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3262 
3263   /* set block sizes from the viewer's .info file */
3264   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3265   /* set local sizes if not set already */
3266   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3267   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3268   /* set global sizes if not set already */
3269   if (mat->rmap->N < 0) mat->rmap->N = M;
3270   if (mat->cmap->N < 0) mat->cmap->N = N;
3271   PetscCall(PetscLayoutSetUp(mat->rmap));
3272   PetscCall(PetscLayoutSetUp(mat->cmap));
3273 
3274   /* check if the matrix sizes are correct */
3275   PetscCall(MatGetSize(mat, &rows, &cols));
3276   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3277   PetscCall(MatGetBlockSize(mat, &bs));
3278   PetscCall(MatGetLocalSize(mat, &m, &n));
3279   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3280   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3281   mbs = m / bs;
3282   nbs = n / bs;
3283 
3284   /* read in row lengths and build row indices */
3285   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3286   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3287   rowidxs[0] = 0;
3288   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3289   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3290   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3291 
3292   /* read in column indices and matrix values */
3293   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3294   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3295   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3296 
3297   {                /* preallocate matrix storage */
3298     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3299     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3300     PetscBool  sbaij, done;
3301     PetscInt  *d_nnz, *o_nnz;
3302 
3303     PetscCall(PetscBTCreate(nbs, &bt));
3304     PetscCall(PetscHSetICreate(&ht));
3305     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3306     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3307     for (i = 0; i < mbs; i++) {
3308       PetscCall(PetscBTMemzero(nbs, bt));
3309       PetscCall(PetscHSetIClear(ht));
3310       for (k = 0; k < bs; k++) {
3311         PetscInt row = bs * i + k;
3312         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3313           PetscInt col = colidxs[j];
3314           if (!sbaij || col >= row) {
3315             if (col >= cs && col < ce) {
3316               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3317             } else {
3318               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3319               if (done) o_nnz[i]++;
3320             }
3321           }
3322         }
3323       }
3324     }
3325     PetscCall(PetscBTDestroy(&bt));
3326     PetscCall(PetscHSetIDestroy(&ht));
3327     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3328     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3329     PetscCall(PetscFree2(d_nnz, o_nnz));
3330   }
3331 
3332   /* store matrix values */
3333   for (i = 0; i < m; i++) {
3334     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3335     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3336   }
3337 
3338   PetscCall(PetscFree(rowidxs));
3339   PetscCall(PetscFree2(colidxs, matvals));
3340   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3341   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3342   PetscFunctionReturn(PETSC_SUCCESS);
3343 }
3344 
3345 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3346 {
3347   PetscBool isbinary;
3348 
3349   PetscFunctionBegin;
3350   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3351   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3352   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3353   PetscFunctionReturn(PETSC_SUCCESS);
3354 }
3355 
3356 /*@
3357   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3358 
3359   Input Parameters:
3360 + mat  - the matrix
3361 - fact - factor
3362 
3363   Options Database Key:
3364 . -mat_use_hash_table <fact> - provide the factor
3365 
3366   Level: advanced
3367 
3368 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3369 @*/
3370 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3371 {
3372   PetscFunctionBegin;
3373   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3374   PetscFunctionReturn(PETSC_SUCCESS);
3375 }
3376 
3377 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3378 {
3379   Mat_MPIBAIJ *baij;
3380 
3381   PetscFunctionBegin;
3382   baij          = (Mat_MPIBAIJ *)mat->data;
3383   baij->ht_fact = fact;
3384   PetscFunctionReturn(PETSC_SUCCESS);
3385 }
3386 
3387 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3388 {
3389   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3390   PetscBool    flg;
3391 
3392   PetscFunctionBegin;
3393   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3394   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3395   if (Ad) *Ad = a->A;
3396   if (Ao) *Ao = a->B;
3397   if (colmap) *colmap = a->garray;
3398   PetscFunctionReturn(PETSC_SUCCESS);
3399 }
3400 
3401 /*
3402     Special version for direct calls from Fortran (to eliminate two function call overheads
3403 */
3404 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3405   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3406 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3407   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3408 #endif
3409 
3410 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3411 /*@C
3412   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3413 
3414   Collective
3415 
3416   Input Parameters:
3417 + matin  - the matrix
3418 . min    - number of input rows
3419 . im     - input rows
3420 . nin    - number of input columns
3421 . in     - input columns
3422 . v      - numerical values input
3423 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3424 
3425   Level: advanced
3426 
3427   Developer Notes:
3428   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3429 
3430 .seealso: `Mat`, `MatSetValuesBlocked()`
3431 @*/
3432 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3433 {
3434   /* convert input arguments to C version */
3435   Mat        mat = *matin;
3436   PetscInt   m = *min, n = *nin;
3437   InsertMode addv = *addvin;
3438 
3439   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3440   const MatScalar *value;
3441   MatScalar       *barray      = baij->barray;
3442   PetscBool        roworiented = baij->roworiented;
3443   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3444   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3445   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3446 
3447   PetscFunctionBegin;
3448   /* tasks normally handled by MatSetValuesBlocked() */
3449   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3450   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3451   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3452   if (mat->assembled) {
3453     mat->was_assembled = PETSC_TRUE;
3454     mat->assembled     = PETSC_FALSE;
3455   }
3456   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3457 
3458   if (!barray) {
3459     PetscCall(PetscMalloc1(bs2, &barray));
3460     baij->barray = barray;
3461   }
3462 
3463   if (roworiented) stepval = (n - 1) * bs;
3464   else stepval = (m - 1) * bs;
3465 
3466   for (i = 0; i < m; i++) {
3467     if (im[i] < 0) continue;
3468     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3469     if (im[i] >= rstart && im[i] < rend) {
3470       row = im[i] - rstart;
3471       for (j = 0; j < n; j++) {
3472         /* If NumCol = 1 then a copy is not required */
3473         if ((roworiented) && (n == 1)) {
3474           barray = (MatScalar *)v + i * bs2;
3475         } else if ((!roworiented) && (m == 1)) {
3476           barray = (MatScalar *)v + j * bs2;
3477         } else { /* Here a copy is required */
3478           if (roworiented) {
3479             value = v + i * (stepval + bs) * bs + j * bs;
3480           } else {
3481             value = v + j * (stepval + bs) * bs + i * bs;
3482           }
3483           for (ii = 0; ii < bs; ii++, value += stepval) {
3484             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3485           }
3486           barray -= bs2;
3487         }
3488 
3489         if (in[j] >= cstart && in[j] < cend) {
3490           col = in[j] - cstart;
3491           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3492         } else if (in[j] < 0) {
3493           continue;
3494         } else {
3495           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3496           if (mat->was_assembled) {
3497             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3498 
3499 #if defined(PETSC_USE_DEBUG)
3500   #if defined(PETSC_USE_CTABLE)
3501             {
3502               PetscInt data;
3503               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3504               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3505             }
3506   #else
3507             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3508   #endif
3509 #endif
3510 #if defined(PETSC_USE_CTABLE)
3511             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3512             col = (col - 1) / bs;
3513 #else
3514             col = (baij->colmap[in[j]] - 1) / bs;
3515 #endif
3516             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3517               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3518               col = in[j];
3519             }
3520           } else col = in[j];
3521           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3522         }
3523       }
3524     } else {
3525       if (!baij->donotstash) {
3526         if (roworiented) {
3527           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3528         } else {
3529           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3530         }
3531       }
3532     }
3533   }
3534 
3535   /* task normally handled by MatSetValuesBlocked() */
3536   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3537   PetscFunctionReturn(PETSC_SUCCESS);
3538 }
3539 
3540 /*@
3541   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3542 
3543   Collective
3544 
3545   Input Parameters:
3546 + comm - MPI communicator
3547 . bs   - the block size, only a block size of 1 is supported
3548 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3549 . n    - This value should be the same as the local size used in creating the
3550          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3551          calculated if `N` is given) For square matrices `n` is almost always `m`.
3552 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3553 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3554 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3555 . j    - column indices
3556 - a    - matrix values
3557 
3558   Output Parameter:
3559 . mat - the matrix
3560 
3561   Level: intermediate
3562 
3563   Notes:
3564   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3565   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3566   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3567 
3568   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3569   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3570   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3571   with column-major ordering within blocks.
3572 
3573   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3574 
3575 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3576           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3577 @*/
3578 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3579 {
3580   PetscFunctionBegin;
3581   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3582   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3583   PetscCall(MatCreate(comm, mat));
3584   PetscCall(MatSetSizes(*mat, m, n, M, N));
3585   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3586   PetscCall(MatSetBlockSize(*mat, bs));
3587   PetscCall(MatSetUp(*mat));
3588   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3589   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3590   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3591   PetscFunctionReturn(PETSC_SUCCESS);
3592 }
3593 
3594 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3595 {
3596   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3597   PetscInt    *indx;
3598   PetscScalar *values;
3599 
3600   PetscFunctionBegin;
3601   PetscCall(MatGetSize(inmat, &m, &N));
3602   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3603     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3604     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3605     PetscInt    *bindx, rmax = a->rmax, j;
3606     PetscMPIInt  rank, size;
3607 
3608     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3609     mbs = m / bs;
3610     Nbs = N / cbs;
3611     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3612     nbs = n / cbs;
3613 
3614     PetscCall(PetscMalloc1(rmax, &bindx));
3615     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3616 
3617     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3618     PetscCallMPI(MPI_Comm_rank(comm, &size));
3619     if (rank == size - 1) {
3620       /* Check sum(nbs) = Nbs */
3621       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3622     }
3623 
3624     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3625     for (i = 0; i < mbs; i++) {
3626       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3627       nnz = nnz / bs;
3628       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3629       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3630       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3631     }
3632     PetscCall(PetscFree(bindx));
3633 
3634     PetscCall(MatCreate(comm, outmat));
3635     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3636     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3637     PetscCall(MatSetType(*outmat, MATBAIJ));
3638     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3639     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3640     MatPreallocateEnd(dnz, onz);
3641     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3642   }
3643 
3644   /* numeric phase */
3645   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3646   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3647 
3648   for (i = 0; i < m; i++) {
3649     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3650     Ii = i + rstart;
3651     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3652     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3653   }
3654   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3655   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3656   PetscFunctionReturn(PETSC_SUCCESS);
3657 }
3658