xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 21604f62fddfd00a143407caac518db3de88a88a)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
663       row = idxm[i] - bsrstart;
664       for (j = 0; j < n; j++) {
665         if (idxn[j] < 0) continue; /* negative column */
666         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667         if (idxn[j] >= bscstart && idxn[j] < bscend) {
668           col = idxn[j] - bscstart;
669           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670         } else {
671           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674           data--;
675 #else
676           data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
679           else {
680             col = data + idxn[j] % bs;
681             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682           }
683         }
684       }
685     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
691 {
692   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
693   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
694   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
695   PetscReal    sum = 0.0;
696   MatScalar   *v;
697 
698   PetscFunctionBegin;
699   if (baij->size == 1) {
700     PetscCall(MatNorm(baij->A, type, nrm));
701   } else {
702     if (type == NORM_FROBENIUS) {
703       v  = amat->a;
704       nz = amat->nz * bs2;
705       for (i = 0; i < nz; i++) {
706         sum += PetscRealPart(PetscConj(*v) * (*v));
707         v++;
708       }
709       v  = bmat->a;
710       nz = bmat->nz * bs2;
711       for (i = 0; i < nz; i++) {
712         sum += PetscRealPart(PetscConj(*v) * (*v));
713         v++;
714       }
715       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
716       *nrm = PetscSqrtReal(*nrm);
717     } else if (type == NORM_1) { /* max column sum */
718       PetscReal *tmp;
719       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
720 
721       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
722       v  = amat->a;
723       jj = amat->j;
724       for (i = 0; i < amat->nz; i++) {
725         for (j = 0; j < bs; j++) {
726           col = bs * (cstart + *jj) + j; /* column index */
727           for (row = 0; row < bs; row++) {
728             tmp[col] += PetscAbsScalar(*v);
729             v++;
730           }
731         }
732         jj++;
733       }
734       v  = bmat->a;
735       jj = bmat->j;
736       for (i = 0; i < bmat->nz; i++) {
737         for (j = 0; j < bs; j++) {
738           col = bs * garray[*jj] + j;
739           for (row = 0; row < bs; row++) {
740             tmp[col] += PetscAbsScalar(*v);
741             v++;
742           }
743         }
744         jj++;
745       }
746       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
747       *nrm = 0.0;
748       for (j = 0; j < mat->cmap->N; j++) {
749         if (tmp[j] > *nrm) *nrm = tmp[j];
750       }
751       PetscCall(PetscFree(tmp));
752     } else if (type == NORM_INFINITY) { /* max row sum */
753       PetscReal *sums;
754       PetscCall(PetscMalloc1(bs, &sums));
755       sum = 0.0;
756       for (j = 0; j < amat->mbs; j++) {
757         for (row = 0; row < bs; row++) sums[row] = 0.0;
758         v  = amat->a + bs2 * amat->i[j];
759         nz = amat->i[j + 1] - amat->i[j];
760         for (i = 0; i < nz; i++) {
761           for (col = 0; col < bs; col++) {
762             for (row = 0; row < bs; row++) {
763               sums[row] += PetscAbsScalar(*v);
764               v++;
765             }
766           }
767         }
768         v  = bmat->a + bs2 * bmat->i[j];
769         nz = bmat->i[j + 1] - bmat->i[j];
770         for (i = 0; i < nz; i++) {
771           for (col = 0; col < bs; col++) {
772             for (row = 0; row < bs; row++) {
773               sums[row] += PetscAbsScalar(*v);
774               v++;
775             }
776           }
777         }
778         for (row = 0; row < bs; row++) {
779           if (sums[row] > sum) sum = sums[row];
780         }
781       }
782       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
783       PetscCall(PetscFree(sums));
784     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
785   }
786   PetscFunctionReturn(PETSC_SUCCESS);
787 }
788 
789 /*
790   Creates the hash table, and sets the table
791   This table is created only once.
792   If new entried need to be added to the matrix
793   then the hash table has to be destroyed and
794   recreated.
795 */
796 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
797 {
798   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
799   Mat          A = baij->A, B = baij->B;
800   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
801   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
802   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
803   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
804   PetscInt    *HT, key;
805   MatScalar  **HD;
806   PetscReal    tmp;
807 #if defined(PETSC_USE_INFO)
808   PetscInt ct = 0, max = 0;
809 #endif
810 
811   PetscFunctionBegin;
812   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
813 
814   baij->ht_size = (PetscInt)(factor * nz);
815   ht_size       = baij->ht_size;
816 
817   /* Allocate Memory for Hash Table */
818   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
819   HD = baij->hd;
820   HT = baij->ht;
821 
822   /* Loop Over A */
823   for (i = 0; i < a->mbs; i++) {
824     for (j = ai[i]; j < ai[i + 1]; j++) {
825       row = i + rstart;
826       col = aj[j] + cstart;
827 
828       key = row * Nbs + col + 1;
829       h1  = HASH(ht_size, key, tmp);
830       for (k = 0; k < ht_size; k++) {
831         if (!HT[(h1 + k) % ht_size]) {
832           HT[(h1 + k) % ht_size] = key;
833           HD[(h1 + k) % ht_size] = a->a + j * bs2;
834           break;
835 #if defined(PETSC_USE_INFO)
836         } else {
837           ct++;
838 #endif
839         }
840       }
841 #if defined(PETSC_USE_INFO)
842       if (k > max) max = k;
843 #endif
844     }
845   }
846   /* Loop Over B */
847   for (i = 0; i < b->mbs; i++) {
848     for (j = bi[i]; j < bi[i + 1]; j++) {
849       row = i + rstart;
850       col = garray[bj[j]];
851       key = row * Nbs + col + 1;
852       h1  = HASH(ht_size, key, tmp);
853       for (k = 0; k < ht_size; k++) {
854         if (!HT[(h1 + k) % ht_size]) {
855           HT[(h1 + k) % ht_size] = key;
856           HD[(h1 + k) % ht_size] = b->a + j * bs2;
857           break;
858 #if defined(PETSC_USE_INFO)
859         } else {
860           ct++;
861 #endif
862         }
863       }
864 #if defined(PETSC_USE_INFO)
865       if (k > max) max = k;
866 #endif
867     }
868   }
869 
870   /* Print Summary */
871 #if defined(PETSC_USE_INFO)
872   for (i = 0, j = 0; i < ht_size; i++) {
873     if (HT[i]) j++;
874   }
875   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
876 #endif
877   PetscFunctionReturn(PETSC_SUCCESS);
878 }
879 
880 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
881 {
882   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
883   PetscInt     nstash, reallocs;
884 
885   PetscFunctionBegin;
886   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
887 
888   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
889   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
890   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
891   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
892   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
893   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
897 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
898 {
899   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
900   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
901   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
902   PetscInt    *row, *col;
903   PetscBool    r1, r2, r3, other_disassembled;
904   MatScalar   *val;
905   PetscMPIInt  n;
906 
907   PetscFunctionBegin;
908   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
909   if (!baij->donotstash && !mat->nooffprocentries) {
910     while (1) {
911       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
912       if (!flg) break;
913 
914       for (i = 0; i < n;) {
915         /* Now identify the consecutive vals belonging to the same row */
916         for (j = i, rstart = row[j]; j < n; j++) {
917           if (row[j] != rstart) break;
918         }
919         if (j < n) ncols = j - i;
920         else ncols = n - i;
921         /* Now assemble all these values with a single function call */
922         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
923         i = j;
924       }
925     }
926     PetscCall(MatStashScatterEnd_Private(&mat->stash));
927     /* Now process the block-stash. Since the values are stashed column-oriented,
928        set the row-oriented flag to column-oriented, and after MatSetValues()
929        restore the original flags */
930     r1 = baij->roworiented;
931     r2 = a->roworiented;
932     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
933 
934     baij->roworiented                           = PETSC_FALSE;
935     a->roworiented                              = PETSC_FALSE;
936     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
937     while (1) {
938       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
939       if (!flg) break;
940 
941       for (i = 0; i < n;) {
942         /* Now identify the consecutive vals belonging to the same row */
943         for (j = i, rstart = row[j]; j < n; j++) {
944           if (row[j] != rstart) break;
945         }
946         if (j < n) ncols = j - i;
947         else ncols = n - i;
948         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
949         i = j;
950       }
951     }
952     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
953 
954     baij->roworiented                           = r1;
955     a->roworiented                              = r2;
956     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
957   }
958 
959   PetscCall(MatAssemblyBegin(baij->A, mode));
960   PetscCall(MatAssemblyEnd(baij->A, mode));
961 
962   /* determine if any processor has disassembled, if so we must
963      also disassemble ourselves, in order that we may reassemble. */
964   /*
965      if nonzero structure of submatrix B cannot change then we know that
966      no processor disassembled thus we can skip this stuff
967   */
968   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
969     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
970     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
971   }
972 
973   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
974   PetscCall(MatAssemblyBegin(baij->B, mode));
975   PetscCall(MatAssemblyEnd(baij->B, mode));
976 
977 #if defined(PETSC_USE_INFO)
978   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
979     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
980 
981     baij->ht_total_ct  = 0;
982     baij->ht_insert_ct = 0;
983   }
984 #endif
985   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
986     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
987 
988     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
989     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
990   }
991 
992   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
993 
994   baij->rowvalues = NULL;
995 
996   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
997   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
998     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
999     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1000   }
1001   PetscFunctionReturn(PETSC_SUCCESS);
1002 }
1003 
1004 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1005 #include <petscdraw.h>
1006 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1007 {
1008   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1009   PetscMPIInt       rank = baij->rank;
1010   PetscInt          bs   = mat->rmap->bs;
1011   PetscBool         iascii, isdraw;
1012   PetscViewer       sviewer;
1013   PetscViewerFormat format;
1014 
1015   PetscFunctionBegin;
1016   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1018   if (iascii) {
1019     PetscCall(PetscViewerGetFormat(viewer, &format));
1020     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1021       MatInfo info;
1022       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1023       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1024       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1025       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1026                                                    mat->rmap->bs, info.memory));
1027       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1028       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1029       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1030       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1031       PetscCall(PetscViewerFlush(viewer));
1032       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1033       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1034       PetscCall(VecScatterView(baij->Mvctx, viewer));
1035       PetscFunctionReturn(PETSC_SUCCESS);
1036     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1037       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1038       PetscFunctionReturn(PETSC_SUCCESS);
1039     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1040       PetscFunctionReturn(PETSC_SUCCESS);
1041     }
1042   }
1043 
1044   if (isdraw) {
1045     PetscDraw draw;
1046     PetscBool isnull;
1047     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1048     PetscCall(PetscDrawIsNull(draw, &isnull));
1049     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1050   }
1051 
1052   {
1053     /* assemble the entire matrix onto first processor. */
1054     Mat          A;
1055     Mat_SeqBAIJ *Aloc;
1056     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1057     MatScalar   *a;
1058     const char  *matname;
1059 
1060     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1061     /* Perhaps this should be the type of mat? */
1062     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1063     if (rank == 0) {
1064       PetscCall(MatSetSizes(A, M, N, M, N));
1065     } else {
1066       PetscCall(MatSetSizes(A, 0, 0, M, N));
1067     }
1068     PetscCall(MatSetType(A, MATMPIBAIJ));
1069     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1070     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1071 
1072     /* copy over the A part */
1073     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1074     ai   = Aloc->i;
1075     aj   = Aloc->j;
1076     a    = Aloc->a;
1077     PetscCall(PetscMalloc1(bs, &rvals));
1078 
1079     for (i = 0; i < mbs; i++) {
1080       rvals[0] = bs * (baij->rstartbs + i);
1081       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1082       for (j = ai[i]; j < ai[i + 1]; j++) {
1083         col = (baij->cstartbs + aj[j]) * bs;
1084         for (k = 0; k < bs; k++) {
1085           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1086           col++;
1087           a += bs;
1088         }
1089       }
1090     }
1091     /* copy over the B part */
1092     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1093     ai   = Aloc->i;
1094     aj   = Aloc->j;
1095     a    = Aloc->a;
1096     for (i = 0; i < mbs; i++) {
1097       rvals[0] = bs * (baij->rstartbs + i);
1098       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1099       for (j = ai[i]; j < ai[i + 1]; j++) {
1100         col = baij->garray[aj[j]] * bs;
1101         for (k = 0; k < bs; k++) {
1102           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1103           col++;
1104           a += bs;
1105         }
1106       }
1107     }
1108     PetscCall(PetscFree(rvals));
1109     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1110     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1111     /*
1112        Everyone has to call to draw the matrix since the graphics waits are
1113        synchronized across all processors that share the PetscDraw object
1114     */
1115     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1116     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1117     if (rank == 0) {
1118       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1119       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1120     }
1121     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1122     PetscCall(MatDestroy(&A));
1123   }
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1128 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1129 {
1130   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1131   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1132   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1133   const PetscInt *garray = aij->garray;
1134   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1135   PetscCount      nz, hnz;
1136   PetscInt       *rowlens, *colidxs;
1137   PetscScalar    *matvals;
1138   PetscMPIInt     rank;
1139 
1140   PetscFunctionBegin;
1141   PetscCall(PetscViewerSetUp(viewer));
1142 
1143   M  = mat->rmap->N;
1144   N  = mat->cmap->N;
1145   m  = mat->rmap->n;
1146   rs = mat->rmap->rstart;
1147   cs = mat->cmap->rstart;
1148   bs = mat->rmap->bs;
1149   nz = bs * bs * (A->nz + B->nz);
1150 
1151   /* write matrix header */
1152   header[0] = MAT_FILE_CLASSID;
1153   header[1] = M;
1154   header[2] = N;
1155   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1156   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1157   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1158   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1159 
1160   /* fill in and store row lengths */
1161   PetscCall(PetscMalloc1(m, &rowlens));
1162   for (cnt = 0, i = 0; i < A->mbs; i++)
1163     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1164   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1165   PetscCall(PetscFree(rowlens));
1166 
1167   /* fill in and store column indices */
1168   PetscCall(PetscMalloc1(nz, &colidxs));
1169   for (cnt = 0, i = 0; i < A->mbs; i++) {
1170     for (k = 0; k < bs; k++) {
1171       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1172         if (garray[B->j[jb]] > cs / bs) break;
1173         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1174       }
1175       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1176         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1177       for (; jb < B->i[i + 1]; jb++)
1178         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1179     }
1180   }
1181   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1182   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1183   PetscCall(PetscFree(colidxs));
1184 
1185   /* fill in and store nonzero values */
1186   PetscCall(PetscMalloc1(nz, &matvals));
1187   for (cnt = 0, i = 0; i < A->mbs; i++) {
1188     for (k = 0; k < bs; k++) {
1189       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1190         if (garray[B->j[jb]] > cs / bs) break;
1191         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1192       }
1193       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1194         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1195       for (; jb < B->i[i + 1]; jb++)
1196         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1197     }
1198   }
1199   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1200   PetscCall(PetscFree(matvals));
1201 
1202   /* write block size option to the viewer's .info file */
1203   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1204   PetscFunctionReturn(PETSC_SUCCESS);
1205 }
1206 
1207 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1208 {
1209   PetscBool iascii, isdraw, issocket, isbinary;
1210 
1211   PetscFunctionBegin;
1212   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1213   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1214   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1215   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1216   if (iascii || isdraw || issocket) {
1217     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1218   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1219   PetscFunctionReturn(PETSC_SUCCESS);
1220 }
1221 
1222 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1225   PetscInt     nt;
1226 
1227   PetscFunctionBegin;
1228   PetscCall(VecGetLocalSize(xx, &nt));
1229   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1230   PetscCall(VecGetLocalSize(yy, &nt));
1231   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1232   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1233   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1234   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1235   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1236   PetscFunctionReturn(PETSC_SUCCESS);
1237 }
1238 
1239 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1240 {
1241   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1245   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1246   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1247   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1248   PetscFunctionReturn(PETSC_SUCCESS);
1249 }
1250 
1251 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1252 {
1253   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1254 
1255   PetscFunctionBegin;
1256   /* do nondiagonal part */
1257   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1258   /* do local part */
1259   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1260   /* add partial results together */
1261   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1262   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1263   PetscFunctionReturn(PETSC_SUCCESS);
1264 }
1265 
1266 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1269 
1270   PetscFunctionBegin;
1271   /* do nondiagonal part */
1272   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1273   /* do local part */
1274   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1275   /* add partial results together */
1276   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1277   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 /*
1282   This only works correctly for square matrices where the subblock A->A is the
1283    diagonal block
1284 */
1285 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1286 {
1287   PetscFunctionBegin;
1288   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1289   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1290   PetscFunctionReturn(PETSC_SUCCESS);
1291 }
1292 
1293 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1294 {
1295   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(MatScale(a->A, aa));
1299   PetscCall(MatScale(a->B, aa));
1300   PetscFunctionReturn(PETSC_SUCCESS);
1301 }
1302 
1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1304 {
1305   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1306   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1307   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1308   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1309   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1310 
1311   PetscFunctionBegin;
1312   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1313   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1314   mat->getrowactive = PETSC_TRUE;
1315 
1316   if (!mat->rowvalues && (idx || v)) {
1317     /*
1318         allocate enough space to hold information from the longest row.
1319     */
1320     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1321     PetscInt     max = 1, mbs = mat->mbs, tmp;
1322     for (i = 0; i < mbs; i++) {
1323       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1324       if (max < tmp) max = tmp;
1325     }
1326     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1327   }
1328   lrow = row - brstart;
1329 
1330   pvA = &vworkA;
1331   pcA = &cworkA;
1332   pvB = &vworkB;
1333   pcB = &cworkB;
1334   if (!v) {
1335     pvA = NULL;
1336     pvB = NULL;
1337   }
1338   if (!idx) {
1339     pcA = NULL;
1340     if (!v) pcB = NULL;
1341   }
1342   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1343   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1344   nztot = nzA + nzB;
1345 
1346   cmap = mat->garray;
1347   if (v || idx) {
1348     if (nztot) {
1349       /* Sort by increasing column numbers, assuming A and B already sorted */
1350       PetscInt imark = -1;
1351       if (v) {
1352         *v = v_p = mat->rowvalues;
1353         for (i = 0; i < nzB; i++) {
1354           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1355           else break;
1356         }
1357         imark = i;
1358         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1359         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1360       }
1361       if (idx) {
1362         *idx = idx_p = mat->rowindices;
1363         if (imark > -1) {
1364           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1365         } else {
1366           for (i = 0; i < nzB; i++) {
1367             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368             else break;
1369           }
1370           imark = i;
1371         }
1372         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1373         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1374       }
1375     } else {
1376       if (idx) *idx = NULL;
1377       if (v) *v = NULL;
1378     }
1379   }
1380   *nz = nztot;
1381   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1382   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1387 {
1388   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1389 
1390   PetscFunctionBegin;
1391   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1392   baij->getrowactive = PETSC_FALSE;
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1397 {
1398   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1399 
1400   PetscFunctionBegin;
1401   PetscCall(MatZeroEntries(l->A));
1402   PetscCall(MatZeroEntries(l->B));
1403   PetscFunctionReturn(PETSC_SUCCESS);
1404 }
1405 
1406 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1407 {
1408   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1409   Mat            A = a->A, B = a->B;
1410   PetscLogDouble isend[5], irecv[5];
1411 
1412   PetscFunctionBegin;
1413   info->block_size = (PetscReal)matin->rmap->bs;
1414 
1415   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1416 
1417   isend[0] = info->nz_used;
1418   isend[1] = info->nz_allocated;
1419   isend[2] = info->nz_unneeded;
1420   isend[3] = info->memory;
1421   isend[4] = info->mallocs;
1422 
1423   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1424 
1425   isend[0] += info->nz_used;
1426   isend[1] += info->nz_allocated;
1427   isend[2] += info->nz_unneeded;
1428   isend[3] += info->memory;
1429   isend[4] += info->mallocs;
1430 
1431   if (flag == MAT_LOCAL) {
1432     info->nz_used      = isend[0];
1433     info->nz_allocated = isend[1];
1434     info->nz_unneeded  = isend[2];
1435     info->memory       = isend[3];
1436     info->mallocs      = isend[4];
1437   } else if (flag == MAT_GLOBAL_MAX) {
1438     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1439 
1440     info->nz_used      = irecv[0];
1441     info->nz_allocated = irecv[1];
1442     info->nz_unneeded  = irecv[2];
1443     info->memory       = irecv[3];
1444     info->mallocs      = irecv[4];
1445   } else if (flag == MAT_GLOBAL_SUM) {
1446     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1454   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1455   info->fill_ratio_needed = 0;
1456   info->factor_mallocs    = 0;
1457   PetscFunctionReturn(PETSC_SUCCESS);
1458 }
1459 
1460 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1461 {
1462   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1463 
1464   PetscFunctionBegin;
1465   switch (op) {
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1468   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1469   case MAT_KEEP_NONZERO_PATTERN:
1470   case MAT_NEW_NONZERO_LOCATION_ERR:
1471     MatCheckPreallocated(A, 1);
1472     PetscCall(MatSetOption(a->A, op, flg));
1473     PetscCall(MatSetOption(a->B, op, flg));
1474     break;
1475   case MAT_ROW_ORIENTED:
1476     MatCheckPreallocated(A, 1);
1477     a->roworiented = flg;
1478 
1479     PetscCall(MatSetOption(a->A, op, flg));
1480     PetscCall(MatSetOption(a->B, op, flg));
1481     break;
1482   case MAT_FORCE_DIAGONAL_ENTRIES:
1483   case MAT_SORTED_FULL:
1484   case MAT_SUBMAT_SINGLEIS:
1485     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1486     break;
1487   case MAT_IGNORE_OFF_PROC_ENTRIES:
1488     a->donotstash = flg;
1489     break;
1490   case MAT_USE_HASH_TABLE:
1491     a->ht_flag = flg;
1492     a->ht_fact = 1.39;
1493     break;
1494   case MAT_SYMMETRIC:
1495   case MAT_STRUCTURALLY_SYMMETRIC:
1496   case MAT_HERMITIAN:
1497   case MAT_SYMMETRY_ETERNAL:
1498   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1499   case MAT_SPD_ETERNAL:
1500     /* if the diagonal matrix is square it inherits some of the properties above */
1501     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1502     break;
1503   default:
1504     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1505   }
1506   PetscFunctionReturn(PETSC_SUCCESS);
1507 }
1508 
1509 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1510 {
1511   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1512   Mat_SeqBAIJ *Aloc;
1513   Mat          B;
1514   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1515   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1516   MatScalar   *a;
1517 
1518   PetscFunctionBegin;
1519   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1520   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1521     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1522     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1523     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1524     /* Do not know preallocation information, but must set block size */
1525     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1526   } else {
1527     B = *matout;
1528   }
1529 
1530   /* copy over the A part */
1531   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1532   ai   = Aloc->i;
1533   aj   = Aloc->j;
1534   a    = Aloc->a;
1535   PetscCall(PetscMalloc1(bs, &rvals));
1536 
1537   for (i = 0; i < mbs; i++) {
1538     rvals[0] = bs * (baij->rstartbs + i);
1539     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1540     for (j = ai[i]; j < ai[i + 1]; j++) {
1541       col = (baij->cstartbs + aj[j]) * bs;
1542       for (k = 0; k < bs; k++) {
1543         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1544 
1545         col++;
1546         a += bs;
1547       }
1548     }
1549   }
1550   /* copy over the B part */
1551   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1552   ai   = Aloc->i;
1553   aj   = Aloc->j;
1554   a    = Aloc->a;
1555   for (i = 0; i < mbs; i++) {
1556     rvals[0] = bs * (baij->rstartbs + i);
1557     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1558     for (j = ai[i]; j < ai[i + 1]; j++) {
1559       col = baij->garray[aj[j]] * bs;
1560       for (k = 0; k < bs; k++) {
1561         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1562         col++;
1563         a += bs;
1564       }
1565     }
1566   }
1567   PetscCall(PetscFree(rvals));
1568   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1569   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1570 
1571   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1572   else PetscCall(MatHeaderMerge(A, &B));
1573   PetscFunctionReturn(PETSC_SUCCESS);
1574 }
1575 
1576 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1577 {
1578   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1579   Mat          a = baij->A, b = baij->B;
1580   PetscInt     s1, s2, s3;
1581 
1582   PetscFunctionBegin;
1583   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1584   if (rr) {
1585     PetscCall(VecGetLocalSize(rr, &s1));
1586     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1587     /* Overlap communication with computation. */
1588     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1589   }
1590   if (ll) {
1591     PetscCall(VecGetLocalSize(ll, &s1));
1592     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1593     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1594   }
1595   /* scale  the diagonal block */
1596   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1597 
1598   if (rr) {
1599     /* Do a scatter end and then right scale the off-diagonal block */
1600     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1601     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1602   }
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1607 {
1608   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1609   PetscInt    *lrows;
1610   PetscInt     r, len;
1611   PetscBool    cong;
1612 
1613   PetscFunctionBegin;
1614   /* get locally owned rows */
1615   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1616   /* fix right-hand side if needed */
1617   if (x && b) {
1618     const PetscScalar *xx;
1619     PetscScalar       *bb;
1620 
1621     PetscCall(VecGetArrayRead(x, &xx));
1622     PetscCall(VecGetArray(b, &bb));
1623     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1624     PetscCall(VecRestoreArrayRead(x, &xx));
1625     PetscCall(VecRestoreArray(b, &bb));
1626   }
1627 
1628   /* actually zap the local rows */
1629   /*
1630         Zero the required rows. If the "diagonal block" of the matrix
1631      is square and the user wishes to set the diagonal we use separate
1632      code so that MatSetValues() is not called for each diagonal allocating
1633      new memory, thus calling lots of mallocs and slowing things down.
1634 
1635   */
1636   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1637   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1638   PetscCall(MatHasCongruentLayouts(A, &cong));
1639   if ((diag != 0.0) && cong) {
1640     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1641   } else if (diag != 0.0) {
1642     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1643     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1644     for (r = 0; r < len; ++r) {
1645       const PetscInt row = lrows[r] + A->rmap->rstart;
1646       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1647     }
1648     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1649     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1650   } else {
1651     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1652   }
1653   PetscCall(PetscFree(lrows));
1654 
1655   /* only change matrix nonzero state if pattern was allowed to be changed */
1656   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1657     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1658     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1659   }
1660   PetscFunctionReturn(PETSC_SUCCESS);
1661 }
1662 
1663 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1664 {
1665   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1666   PetscMPIInt        n, p = 0;
1667   PetscInt           i, j, k, r, len = 0, row, col, count;
1668   PetscInt          *lrows, *owners = A->rmap->range;
1669   PetscSFNode       *rrows;
1670   PetscSF            sf;
1671   const PetscScalar *xx;
1672   PetscScalar       *bb, *mask;
1673   Vec                xmask, lmask;
1674   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1675   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1676   PetscScalar       *aa;
1677 
1678   PetscFunctionBegin;
1679   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1680   /* Create SF where leaves are input rows and roots are owned rows */
1681   PetscCall(PetscMalloc1(n, &lrows));
1682   for (r = 0; r < n; ++r) lrows[r] = -1;
1683   PetscCall(PetscMalloc1(N, &rrows));
1684   for (r = 0; r < N; ++r) {
1685     const PetscInt idx = rows[r];
1686     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1687     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1688       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1689     }
1690     rrows[r].rank  = p;
1691     rrows[r].index = rows[r] - owners[p];
1692   }
1693   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1694   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1695   /* Collect flags for rows to be zeroed */
1696   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1697   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1698   PetscCall(PetscSFDestroy(&sf));
1699   /* Compress and put in row numbers */
1700   for (r = 0; r < n; ++r)
1701     if (lrows[r] >= 0) lrows[len++] = r;
1702   /* zero diagonal part of matrix */
1703   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1704   /* handle off-diagonal part of matrix */
1705   PetscCall(MatCreateVecs(A, &xmask, NULL));
1706   PetscCall(VecDuplicate(l->lvec, &lmask));
1707   PetscCall(VecGetArray(xmask, &bb));
1708   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1709   PetscCall(VecRestoreArray(xmask, &bb));
1710   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1711   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1712   PetscCall(VecDestroy(&xmask));
1713   if (x) {
1714     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1715     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1716     PetscCall(VecGetArrayRead(l->lvec, &xx));
1717     PetscCall(VecGetArray(b, &bb));
1718   }
1719   PetscCall(VecGetArray(lmask, &mask));
1720   /* remove zeroed rows of off-diagonal matrix */
1721   for (i = 0; i < len; ++i) {
1722     row   = lrows[i];
1723     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1724     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1725     for (k = 0; k < count; ++k) {
1726       aa[0] = 0.0;
1727       aa += bs;
1728     }
1729   }
1730   /* loop over all elements of off process part of matrix zeroing removed columns*/
1731   for (i = 0; i < l->B->rmap->N; ++i) {
1732     row = i / bs;
1733     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1734       for (k = 0; k < bs; ++k) {
1735         col = bs * baij->j[j] + k;
1736         if (PetscAbsScalar(mask[col])) {
1737           aa = baij->a + j * bs2 + (i % bs) + bs * k;
1738           if (x) bb[i] -= aa[0] * xx[col];
1739           aa[0] = 0.0;
1740         }
1741       }
1742     }
1743   }
1744   if (x) {
1745     PetscCall(VecRestoreArray(b, &bb));
1746     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1747   }
1748   PetscCall(VecRestoreArray(lmask, &mask));
1749   PetscCall(VecDestroy(&lmask));
1750   PetscCall(PetscFree(lrows));
1751 
1752   /* only change matrix nonzero state if pattern was allowed to be changed */
1753   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1754     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1755     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1756   }
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1761 {
1762   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1763 
1764   PetscFunctionBegin;
1765   PetscCall(MatSetUnfactored(a->A));
1766   PetscFunctionReturn(PETSC_SUCCESS);
1767 }
1768 
1769 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1770 
1771 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1772 {
1773   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1774   Mat          a, b, c, d;
1775   PetscBool    flg;
1776 
1777   PetscFunctionBegin;
1778   a = matA->A;
1779   b = matA->B;
1780   c = matB->A;
1781   d = matB->B;
1782 
1783   PetscCall(MatEqual(a, c, &flg));
1784   if (flg) PetscCall(MatEqual(b, d, &flg));
1785   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1786   PetscFunctionReturn(PETSC_SUCCESS);
1787 }
1788 
1789 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1790 {
1791   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1792   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1793 
1794   PetscFunctionBegin;
1795   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1796   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1797     PetscCall(MatCopy_Basic(A, B, str));
1798   } else {
1799     PetscCall(MatCopy(a->A, b->A, str));
1800     PetscCall(MatCopy(a->B, b->B, str));
1801   }
1802   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1803   PetscFunctionReturn(PETSC_SUCCESS);
1804 }
1805 
1806 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1807 {
1808   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1809   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1810   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1811 
1812   PetscFunctionBegin;
1813   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1814   PetscFunctionReturn(PETSC_SUCCESS);
1815 }
1816 
1817 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1818 {
1819   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1820   PetscBLASInt bnz, one                         = 1;
1821   Mat_SeqBAIJ *x, *y;
1822   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1823 
1824   PetscFunctionBegin;
1825   if (str == SAME_NONZERO_PATTERN) {
1826     PetscScalar alpha = a;
1827     x                 = (Mat_SeqBAIJ *)xx->A->data;
1828     y                 = (Mat_SeqBAIJ *)yy->A->data;
1829     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1830     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1831     x = (Mat_SeqBAIJ *)xx->B->data;
1832     y = (Mat_SeqBAIJ *)yy->B->data;
1833     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1834     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1835     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1836   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1837     PetscCall(MatAXPY_Basic(Y, a, X, str));
1838   } else {
1839     Mat       B;
1840     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1841     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1842     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1843     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1844     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1845     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1846     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1847     PetscCall(MatSetType(B, MATMPIBAIJ));
1848     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1849     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1850     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1851     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1852     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1853     PetscCall(MatHeaderMerge(Y, &B));
1854     PetscCall(PetscFree(nnz_d));
1855     PetscCall(PetscFree(nnz_o));
1856   }
1857   PetscFunctionReturn(PETSC_SUCCESS);
1858 }
1859 
1860 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1861 {
1862   PetscFunctionBegin;
1863   if (PetscDefined(USE_COMPLEX)) {
1864     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1865 
1866     PetscCall(MatConjugate_SeqBAIJ(a->A));
1867     PetscCall(MatConjugate_SeqBAIJ(a->B));
1868   }
1869   PetscFunctionReturn(PETSC_SUCCESS);
1870 }
1871 
1872 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1873 {
1874   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1875 
1876   PetscFunctionBegin;
1877   PetscCall(MatRealPart(a->A));
1878   PetscCall(MatRealPart(a->B));
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1883 {
1884   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1885 
1886   PetscFunctionBegin;
1887   PetscCall(MatImaginaryPart(a->A));
1888   PetscCall(MatImaginaryPart(a->B));
1889   PetscFunctionReturn(PETSC_SUCCESS);
1890 }
1891 
1892 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1893 {
1894   IS       iscol_local;
1895   PetscInt csize;
1896 
1897   PetscFunctionBegin;
1898   PetscCall(ISGetLocalSize(iscol, &csize));
1899   if (call == MAT_REUSE_MATRIX) {
1900     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1901     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1902   } else {
1903     PetscCall(ISAllGather(iscol, &iscol_local));
1904   }
1905   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1906   if (call == MAT_INITIAL_MATRIX) {
1907     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1908     PetscCall(ISDestroy(&iscol_local));
1909   }
1910   PetscFunctionReturn(PETSC_SUCCESS);
1911 }
1912 
1913 /*
1914   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1915   in local and then by concatenating the local matrices the end result.
1916   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1917   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1918 */
1919 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1920 {
1921   PetscMPIInt  rank, size;
1922   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1923   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1924   Mat          M, Mreuse;
1925   MatScalar   *vwork, *aa;
1926   MPI_Comm     comm;
1927   IS           isrow_new, iscol_new;
1928   Mat_SeqBAIJ *aij;
1929 
1930   PetscFunctionBegin;
1931   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1932   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1933   PetscCallMPI(MPI_Comm_size(comm, &size));
1934   /* The compression and expansion should be avoided. Doesn't point
1935      out errors, might change the indices, hence buggey */
1936   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1937   if (isrow == iscol) {
1938     iscol_new = isrow_new;
1939     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1940   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1941 
1942   if (call == MAT_REUSE_MATRIX) {
1943     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1944     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1945     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1946   } else {
1947     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1948   }
1949   PetscCall(ISDestroy(&isrow_new));
1950   PetscCall(ISDestroy(&iscol_new));
1951   /*
1952       m - number of local rows
1953       n - number of columns (same on all processors)
1954       rstart - first row in new global matrix generated
1955   */
1956   PetscCall(MatGetBlockSize(mat, &bs));
1957   PetscCall(MatGetSize(Mreuse, &m, &n));
1958   m = m / bs;
1959   n = n / bs;
1960 
1961   if (call == MAT_INITIAL_MATRIX) {
1962     aij = (Mat_SeqBAIJ *)Mreuse->data;
1963     ii  = aij->i;
1964     jj  = aij->j;
1965 
1966     /*
1967         Determine the number of non-zeros in the diagonal and off-diagonal
1968         portions of the matrix in order to do correct preallocation
1969     */
1970 
1971     /* first get start and end of "diagonal" columns */
1972     if (csize == PETSC_DECIDE) {
1973       PetscCall(ISGetSize(isrow, &mglobal));
1974       if (mglobal == n * bs) { /* square matrix */
1975         nlocal = m;
1976       } else {
1977         nlocal = n / size + ((n % size) > rank);
1978       }
1979     } else {
1980       nlocal = csize / bs;
1981     }
1982     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1983     rstart = rend - nlocal;
1984     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1985 
1986     /* next, compute all the lengths */
1987     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1988     for (i = 0; i < m; i++) {
1989       jend = ii[i + 1] - ii[i];
1990       olen = 0;
1991       dlen = 0;
1992       for (j = 0; j < jend; j++) {
1993         if (*jj < rstart || *jj >= rend) olen++;
1994         else dlen++;
1995         jj++;
1996       }
1997       olens[i] = olen;
1998       dlens[i] = dlen;
1999     }
2000     PetscCall(MatCreate(comm, &M));
2001     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2002     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2003     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2004     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2005     PetscCall(PetscFree2(dlens, olens));
2006   } else {
2007     PetscInt ml, nl;
2008 
2009     M = *newmat;
2010     PetscCall(MatGetLocalSize(M, &ml, &nl));
2011     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2012     PetscCall(MatZeroEntries(M));
2013     /*
2014          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2015        rather than the slower MatSetValues().
2016     */
2017     M->was_assembled = PETSC_TRUE;
2018     M->assembled     = PETSC_FALSE;
2019   }
2020   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2021   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2022   aij = (Mat_SeqBAIJ *)Mreuse->data;
2023   ii  = aij->i;
2024   jj  = aij->j;
2025   aa  = aij->a;
2026   for (i = 0; i < m; i++) {
2027     row   = rstart / bs + i;
2028     nz    = ii[i + 1] - ii[i];
2029     cwork = jj;
2030     jj    = PetscSafePointerPlusOffset(jj, nz);
2031     vwork = aa;
2032     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2033     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2034   }
2035 
2036   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2037   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2038   *newmat = M;
2039 
2040   /* save submatrix used in processor for next request */
2041   if (call == MAT_INITIAL_MATRIX) {
2042     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2043     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2044   }
2045   PetscFunctionReturn(PETSC_SUCCESS);
2046 }
2047 
2048 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2049 {
2050   MPI_Comm        comm, pcomm;
2051   PetscInt        clocal_size, nrows;
2052   const PetscInt *rows;
2053   PetscMPIInt     size;
2054   IS              crowp, lcolp;
2055 
2056   PetscFunctionBegin;
2057   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2058   /* make a collective version of 'rowp' */
2059   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2060   if (pcomm == comm) {
2061     crowp = rowp;
2062   } else {
2063     PetscCall(ISGetSize(rowp, &nrows));
2064     PetscCall(ISGetIndices(rowp, &rows));
2065     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2066     PetscCall(ISRestoreIndices(rowp, &rows));
2067   }
2068   PetscCall(ISSetPermutation(crowp));
2069   /* make a local version of 'colp' */
2070   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2071   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2072   if (size == 1) {
2073     lcolp = colp;
2074   } else {
2075     PetscCall(ISAllGather(colp, &lcolp));
2076   }
2077   PetscCall(ISSetPermutation(lcolp));
2078   /* now we just get the submatrix */
2079   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2080   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2081   /* clean up */
2082   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2083   if (size > 1) PetscCall(ISDestroy(&lcolp));
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
2087 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2088 {
2089   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2090   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2091 
2092   PetscFunctionBegin;
2093   if (nghosts) *nghosts = B->nbs;
2094   if (ghosts) *ghosts = baij->garray;
2095   PetscFunctionReturn(PETSC_SUCCESS);
2096 }
2097 
2098 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2099 {
2100   Mat          B;
2101   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2102   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2103   Mat_SeqAIJ  *b;
2104   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2105   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2106   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2107 
2108   PetscFunctionBegin;
2109   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2110   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2111 
2112   /*   Tell every processor the number of nonzeros per row  */
2113   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2114   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2115   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2116   displs = recvcounts + size;
2117   for (i = 0; i < size; i++) {
2118     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2119     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2120   }
2121   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2122   /* Create the sequential matrix of the same type as the local block diagonal  */
2123   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2124   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2125   PetscCall(MatSetType(B, MATSEQAIJ));
2126   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2127   b = (Mat_SeqAIJ *)B->data;
2128 
2129   /*     Copy my part of matrix column indices over  */
2130   sendcount  = ad->nz + bd->nz;
2131   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2132   a_jsendbuf = ad->j;
2133   b_jsendbuf = bd->j;
2134   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2135   cnt        = 0;
2136   for (i = 0; i < n; i++) {
2137     /* put in lower diagonal portion */
2138     m = bd->i[i + 1] - bd->i[i];
2139     while (m > 0) {
2140       /* is it above diagonal (in bd (compressed) numbering) */
2141       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2142       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2143       m--;
2144     }
2145 
2146     /* put in diagonal portion */
2147     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2148 
2149     /* put in upper diagonal portion */
2150     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2151   }
2152   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2153 
2154   /*  Gather all column indices to all processors  */
2155   for (i = 0; i < size; i++) {
2156     recvcounts[i] = 0;
2157     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2158   }
2159   displs[0] = 0;
2160   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2161   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2162   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2163   /* set the b->ilen (length of each row) values */
2164   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2165   /* set the b->i indices */
2166   b->i[0] = 0;
2167   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2168   PetscCall(PetscFree(lens));
2169   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2170   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2171   PetscCall(PetscFree(recvcounts));
2172 
2173   PetscCall(MatPropagateSymmetryOptions(A, B));
2174   *newmat = B;
2175   PetscFunctionReturn(PETSC_SUCCESS);
2176 }
2177 
2178 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2179 {
2180   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2181   Vec          bb1 = NULL;
2182 
2183   PetscFunctionBegin;
2184   if (flag == SOR_APPLY_UPPER) {
2185     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2186     PetscFunctionReturn(PETSC_SUCCESS);
2187   }
2188 
2189   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2190 
2191   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2192     if (flag & SOR_ZERO_INITIAL_GUESS) {
2193       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2194       its--;
2195     }
2196 
2197     while (its--) {
2198       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2199       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2200 
2201       /* update rhs: bb1 = bb - B*x */
2202       PetscCall(VecScale(mat->lvec, -1.0));
2203       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2204 
2205       /* local sweep */
2206       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2207     }
2208   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2209     if (flag & SOR_ZERO_INITIAL_GUESS) {
2210       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2211       its--;
2212     }
2213     while (its--) {
2214       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2215       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2216 
2217       /* update rhs: bb1 = bb - B*x */
2218       PetscCall(VecScale(mat->lvec, -1.0));
2219       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2220 
2221       /* local sweep */
2222       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2223     }
2224   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2225     if (flag & SOR_ZERO_INITIAL_GUESS) {
2226       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2227       its--;
2228     }
2229     while (its--) {
2230       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2231       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2232 
2233       /* update rhs: bb1 = bb - B*x */
2234       PetscCall(VecScale(mat->lvec, -1.0));
2235       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2236 
2237       /* local sweep */
2238       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2239     }
2240   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2241 
2242   PetscCall(VecDestroy(&bb1));
2243   PetscFunctionReturn(PETSC_SUCCESS);
2244 }
2245 
2246 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2247 {
2248   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2249   PetscInt     m, N, i, *garray = aij->garray;
2250   PetscInt     ib, jb, bs = A->rmap->bs;
2251   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2252   MatScalar   *a_val = a_aij->a;
2253   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2254   MatScalar   *b_val = b_aij->a;
2255   PetscReal   *work;
2256 
2257   PetscFunctionBegin;
2258   PetscCall(MatGetSize(A, &m, &N));
2259   PetscCall(PetscCalloc1(N, &work));
2260   if (type == NORM_2) {
2261     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2262       for (jb = 0; jb < bs; jb++) {
2263         for (ib = 0; ib < bs; ib++) {
2264           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2265           a_val++;
2266         }
2267       }
2268     }
2269     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2270       for (jb = 0; jb < bs; jb++) {
2271         for (ib = 0; ib < bs; ib++) {
2272           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2273           b_val++;
2274         }
2275       }
2276     }
2277   } else if (type == NORM_1) {
2278     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2279       for (jb = 0; jb < bs; jb++) {
2280         for (ib = 0; ib < bs; ib++) {
2281           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2282           a_val++;
2283         }
2284       }
2285     }
2286     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2287       for (jb = 0; jb < bs; jb++) {
2288         for (ib = 0; ib < bs; ib++) {
2289           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2290           b_val++;
2291         }
2292       }
2293     }
2294   } else if (type == NORM_INFINITY) {
2295     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2296       for (jb = 0; jb < bs; jb++) {
2297         for (ib = 0; ib < bs; ib++) {
2298           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2299           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2300           a_val++;
2301         }
2302       }
2303     }
2304     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2305       for (jb = 0; jb < bs; jb++) {
2306         for (ib = 0; ib < bs; ib++) {
2307           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2308           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2309           b_val++;
2310         }
2311       }
2312     }
2313   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2314     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2315       for (jb = 0; jb < bs; jb++) {
2316         for (ib = 0; ib < bs; ib++) {
2317           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2318           a_val++;
2319         }
2320       }
2321     }
2322     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2323       for (jb = 0; jb < bs; jb++) {
2324         for (ib = 0; ib < bs; ib++) {
2325           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2326           b_val++;
2327         }
2328       }
2329     }
2330   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2331     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2332       for (jb = 0; jb < bs; jb++) {
2333         for (ib = 0; ib < bs; ib++) {
2334           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2335           a_val++;
2336         }
2337       }
2338     }
2339     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2340       for (jb = 0; jb < bs; jb++) {
2341         for (ib = 0; ib < bs; ib++) {
2342           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2343           b_val++;
2344         }
2345       }
2346     }
2347   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2348   if (type == NORM_INFINITY) {
2349     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2350   } else {
2351     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2352   }
2353   PetscCall(PetscFree(work));
2354   if (type == NORM_2) {
2355     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2356   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2357     for (i = 0; i < N; i++) reductions[i] /= m;
2358   }
2359   PetscFunctionReturn(PETSC_SUCCESS);
2360 }
2361 
2362 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2363 {
2364   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2365 
2366   PetscFunctionBegin;
2367   PetscCall(MatInvertBlockDiagonal(a->A, values));
2368   A->factorerrortype             = a->A->factorerrortype;
2369   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2370   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2371   PetscFunctionReturn(PETSC_SUCCESS);
2372 }
2373 
2374 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2375 {
2376   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2377   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2378 
2379   PetscFunctionBegin;
2380   if (!Y->preallocated) {
2381     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2382   } else if (!aij->nz) {
2383     PetscInt nonew = aij->nonew;
2384     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2385     aij->nonew = nonew;
2386   }
2387   PetscCall(MatShift_Basic(Y, a));
2388   PetscFunctionReturn(PETSC_SUCCESS);
2389 }
2390 
2391 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2392 {
2393   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2394 
2395   PetscFunctionBegin;
2396   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2397   PetscCall(MatMissingDiagonal(a->A, missing, d));
2398   if (d) {
2399     PetscInt rstart;
2400     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2401     *d += rstart / A->rmap->bs;
2402   }
2403   PetscFunctionReturn(PETSC_SUCCESS);
2404 }
2405 
2406 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2407 {
2408   PetscFunctionBegin;
2409   *a = ((Mat_MPIBAIJ *)A->data)->A;
2410   PetscFunctionReturn(PETSC_SUCCESS);
2411 }
2412 
2413 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2414 {
2415   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2416 
2417   PetscFunctionBegin;
2418   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2419   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2420   PetscFunctionReturn(PETSC_SUCCESS);
2421 }
2422 
2423 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2424                                        MatGetRow_MPIBAIJ,
2425                                        MatRestoreRow_MPIBAIJ,
2426                                        MatMult_MPIBAIJ,
2427                                        /* 4*/ MatMultAdd_MPIBAIJ,
2428                                        MatMultTranspose_MPIBAIJ,
2429                                        MatMultTransposeAdd_MPIBAIJ,
2430                                        NULL,
2431                                        NULL,
2432                                        NULL,
2433                                        /*10*/ NULL,
2434                                        NULL,
2435                                        NULL,
2436                                        MatSOR_MPIBAIJ,
2437                                        MatTranspose_MPIBAIJ,
2438                                        /*15*/ MatGetInfo_MPIBAIJ,
2439                                        MatEqual_MPIBAIJ,
2440                                        MatGetDiagonal_MPIBAIJ,
2441                                        MatDiagonalScale_MPIBAIJ,
2442                                        MatNorm_MPIBAIJ,
2443                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2444                                        MatAssemblyEnd_MPIBAIJ,
2445                                        MatSetOption_MPIBAIJ,
2446                                        MatZeroEntries_MPIBAIJ,
2447                                        /*24*/ MatZeroRows_MPIBAIJ,
2448                                        NULL,
2449                                        NULL,
2450                                        NULL,
2451                                        NULL,
2452                                        /*29*/ MatSetUp_MPI_Hash,
2453                                        NULL,
2454                                        NULL,
2455                                        MatGetDiagonalBlock_MPIBAIJ,
2456                                        NULL,
2457                                        /*34*/ MatDuplicate_MPIBAIJ,
2458                                        NULL,
2459                                        NULL,
2460                                        NULL,
2461                                        NULL,
2462                                        /*39*/ MatAXPY_MPIBAIJ,
2463                                        MatCreateSubMatrices_MPIBAIJ,
2464                                        MatIncreaseOverlap_MPIBAIJ,
2465                                        MatGetValues_MPIBAIJ,
2466                                        MatCopy_MPIBAIJ,
2467                                        /*44*/ NULL,
2468                                        MatScale_MPIBAIJ,
2469                                        MatShift_MPIBAIJ,
2470                                        NULL,
2471                                        MatZeroRowsColumns_MPIBAIJ,
2472                                        /*49*/ NULL,
2473                                        NULL,
2474                                        NULL,
2475                                        NULL,
2476                                        NULL,
2477                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2478                                        NULL,
2479                                        MatSetUnfactored_MPIBAIJ,
2480                                        MatPermute_MPIBAIJ,
2481                                        MatSetValuesBlocked_MPIBAIJ,
2482                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2483                                        MatDestroy_MPIBAIJ,
2484                                        MatView_MPIBAIJ,
2485                                        NULL,
2486                                        NULL,
2487                                        /*64*/ NULL,
2488                                        NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        /*74*/ NULL,
2498                                        MatFDColoringApply_BAIJ,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        /*79*/ NULL,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        MatLoad_MPIBAIJ,
2507                                        /*84*/ NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        /*89*/ NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        /*94*/ NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        /*99*/ NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        MatConjugate_MPIBAIJ,
2526                                        NULL,
2527                                        /*104*/ NULL,
2528                                        MatRealPart_MPIBAIJ,
2529                                        MatImaginaryPart_MPIBAIJ,
2530                                        NULL,
2531                                        NULL,
2532                                        /*109*/ NULL,
2533                                        NULL,
2534                                        NULL,
2535                                        NULL,
2536                                        MatMissingDiagonal_MPIBAIJ,
2537                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2538                                        NULL,
2539                                        MatGetGhosts_MPIBAIJ,
2540                                        NULL,
2541                                        NULL,
2542                                        /*119*/ NULL,
2543                                        NULL,
2544                                        NULL,
2545                                        NULL,
2546                                        MatGetMultiProcBlock_MPIBAIJ,
2547                                        /*124*/ NULL,
2548                                        MatGetColumnReductions_MPIBAIJ,
2549                                        MatInvertBlockDiagonal_MPIBAIJ,
2550                                        NULL,
2551                                        NULL,
2552                                        /*129*/ NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        /*134*/ NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        /*139*/ MatSetBlockSizes_Default,
2563                                        NULL,
2564                                        NULL,
2565                                        MatFDColoringSetUp_MPIXAIJ,
2566                                        NULL,
2567                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2568                                        NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                        NULL,
2573                                        /*150*/ NULL,
2574                                        MatEliminateZeros_MPIBAIJ,
2575                                        MatGetRowSumAbs_MPIBAIJ,
2576                                        NULL,
2577                                        NULL,
2578                                        /*155*/ NULL,
2579                                        MatCopyHashToXAIJ_MPI_Hash};
2580 
2581 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2582 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2583 
2584 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2585 {
2586   PetscInt        m, rstart, cstart, cend;
2587   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2588   const PetscInt *JJ          = NULL;
2589   PetscScalar    *values      = NULL;
2590   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2591   PetscBool       nooffprocentries;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2595   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2596   PetscCall(PetscLayoutSetUp(B->rmap));
2597   PetscCall(PetscLayoutSetUp(B->cmap));
2598   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2599   m      = B->rmap->n / bs;
2600   rstart = B->rmap->rstart / bs;
2601   cstart = B->cmap->rstart / bs;
2602   cend   = B->cmap->rend / bs;
2603 
2604   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2605   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2606   for (i = 0; i < m; i++) {
2607     nz = ii[i + 1] - ii[i];
2608     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2609     nz_max = PetscMax(nz_max, nz);
2610     dlen   = 0;
2611     olen   = 0;
2612     JJ     = jj + ii[i];
2613     for (j = 0; j < nz; j++) {
2614       if (*JJ < cstart || *JJ >= cend) olen++;
2615       else dlen++;
2616       JJ++;
2617     }
2618     d_nnz[i] = dlen;
2619     o_nnz[i] = olen;
2620   }
2621   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2622   PetscCall(PetscFree2(d_nnz, o_nnz));
2623 
2624   values = (PetscScalar *)V;
2625   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2626   for (i = 0; i < m; i++) {
2627     PetscInt        row   = i + rstart;
2628     PetscInt        ncols = ii[i + 1] - ii[i];
2629     const PetscInt *icols = jj + ii[i];
2630     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2631       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2632       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2633     } else { /* block ordering does not match so we can only insert one block at a time. */
2634       PetscInt j;
2635       for (j = 0; j < ncols; j++) {
2636         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2637         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2638       }
2639     }
2640   }
2641 
2642   if (!V) PetscCall(PetscFree(values));
2643   nooffprocentries    = B->nooffprocentries;
2644   B->nooffprocentries = PETSC_TRUE;
2645   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2646   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2647   B->nooffprocentries = nooffprocentries;
2648 
2649   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
2653 /*@C
2654   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2655 
2656   Collective
2657 
2658   Input Parameters:
2659 + B  - the matrix
2660 . bs - the block size
2661 . i  - the indices into `j` for the start of each local row (starts with zero)
2662 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2663 - v  - optional values in the matrix, use `NULL` if not provided
2664 
2665   Level: advanced
2666 
2667   Notes:
2668   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2669   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2670   called this routine.
2671 
2672   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2673   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2674   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2675   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2676   block column and the second index is over columns within a block.
2677 
2678   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2679 
2680 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2681 @*/
2682 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2683 {
2684   PetscFunctionBegin;
2685   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2686   PetscValidType(B, 1);
2687   PetscValidLogicalCollectiveInt(B, bs, 2);
2688   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2689   PetscFunctionReturn(PETSC_SUCCESS);
2690 }
2691 
2692 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2693 {
2694   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2695   PetscInt     i;
2696   PetscMPIInt  size;
2697 
2698   PetscFunctionBegin;
2699   if (B->hash_active) {
2700     B->ops[0]      = b->cops;
2701     B->hash_active = PETSC_FALSE;
2702   }
2703   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2704   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2705   PetscCall(PetscLayoutSetUp(B->rmap));
2706   PetscCall(PetscLayoutSetUp(B->cmap));
2707   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2708 
2709   if (d_nnz) {
2710     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2711   }
2712   if (o_nnz) {
2713     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2714   }
2715 
2716   b->bs2 = bs * bs;
2717   b->mbs = B->rmap->n / bs;
2718   b->nbs = B->cmap->n / bs;
2719   b->Mbs = B->rmap->N / bs;
2720   b->Nbs = B->cmap->N / bs;
2721 
2722   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2723   b->rstartbs = B->rmap->rstart / bs;
2724   b->rendbs   = B->rmap->rend / bs;
2725   b->cstartbs = B->cmap->rstart / bs;
2726   b->cendbs   = B->cmap->rend / bs;
2727 
2728 #if defined(PETSC_USE_CTABLE)
2729   PetscCall(PetscHMapIDestroy(&b->colmap));
2730 #else
2731   PetscCall(PetscFree(b->colmap));
2732 #endif
2733   PetscCall(PetscFree(b->garray));
2734   PetscCall(VecDestroy(&b->lvec));
2735   PetscCall(VecScatterDestroy(&b->Mvctx));
2736 
2737   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2738 
2739   MatSeqXAIJGetOptions_Private(b->B);
2740   PetscCall(MatDestroy(&b->B));
2741   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2742   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2743   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2744   MatSeqXAIJRestoreOptions_Private(b->B);
2745 
2746   MatSeqXAIJGetOptions_Private(b->A);
2747   PetscCall(MatDestroy(&b->A));
2748   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2749   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2750   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2751   MatSeqXAIJRestoreOptions_Private(b->A);
2752 
2753   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2754   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2755   B->preallocated  = PETSC_TRUE;
2756   B->was_assembled = PETSC_FALSE;
2757   B->assembled     = PETSC_FALSE;
2758   PetscFunctionReturn(PETSC_SUCCESS);
2759 }
2760 
2761 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2762 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2763 
2764 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2765 {
2766   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2767   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2768   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2769   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2770 
2771   PetscFunctionBegin;
2772   PetscCall(PetscMalloc1(M + 1, &ii));
2773   ii[0] = 0;
2774   for (i = 0; i < M; i++) {
2775     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2776     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2777     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2778     /* remove one from count of matrix has diagonal */
2779     for (j = id[i]; j < id[i + 1]; j++) {
2780       if (jd[j] == i) {
2781         ii[i + 1]--;
2782         break;
2783       }
2784     }
2785   }
2786   PetscCall(PetscMalloc1(ii[M], &jj));
2787   cnt = 0;
2788   for (i = 0; i < M; i++) {
2789     for (j = io[i]; j < io[i + 1]; j++) {
2790       if (garray[jo[j]] > rstart) break;
2791       jj[cnt++] = garray[jo[j]];
2792     }
2793     for (k = id[i]; k < id[i + 1]; k++) {
2794       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2795     }
2796     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2797   }
2798   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2799   PetscFunctionReturn(PETSC_SUCCESS);
2800 }
2801 
2802 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2803 
2804 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2805 
2806 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2807 {
2808   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2809   Mat_MPIAIJ  *b;
2810   Mat          B;
2811 
2812   PetscFunctionBegin;
2813   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2814 
2815   if (reuse == MAT_REUSE_MATRIX) {
2816     B = *newmat;
2817   } else {
2818     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2819     PetscCall(MatSetType(B, MATMPIAIJ));
2820     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2821     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2822     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2823     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2824   }
2825   b = (Mat_MPIAIJ *)B->data;
2826 
2827   if (reuse == MAT_REUSE_MATRIX) {
2828     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2829     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2830   } else {
2831     PetscInt   *garray = a->garray;
2832     Mat_SeqAIJ *bB;
2833     PetscInt    bs, nnz;
2834     PetscCall(MatDestroy(&b->A));
2835     PetscCall(MatDestroy(&b->B));
2836     /* just clear out the data structure */
2837     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2838     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2839     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2840 
2841     /* Global numbering for b->B columns */
2842     bB  = (Mat_SeqAIJ *)b->B->data;
2843     bs  = A->rmap->bs;
2844     nnz = bB->i[A->rmap->n];
2845     for (PetscInt k = 0; k < nnz; k++) {
2846       PetscInt bj = bB->j[k] / bs;
2847       PetscInt br = bB->j[k] % bs;
2848       bB->j[k]    = garray[bj] * bs + br;
2849     }
2850   }
2851   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2852   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2853 
2854   if (reuse == MAT_INPLACE_MATRIX) {
2855     PetscCall(MatHeaderReplace(A, &B));
2856   } else {
2857     *newmat = B;
2858   }
2859   PetscFunctionReturn(PETSC_SUCCESS);
2860 }
2861 
2862 /*MC
2863    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2864 
2865    Options Database Keys:
2866 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2867 . -mat_block_size <bs> - set the blocksize used to store the matrix
2868 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2869 - -mat_use_hash_table <fact> - set hash table factor
2870 
2871    Level: beginner
2872 
2873    Note:
2874     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2875     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2876 
2877 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2878 M*/
2879 
2880 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2881 
2882 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2883 {
2884   Mat_MPIBAIJ *b;
2885   PetscBool    flg = PETSC_FALSE;
2886 
2887   PetscFunctionBegin;
2888   PetscCall(PetscNew(&b));
2889   B->data      = (void *)b;
2890   B->ops[0]    = MatOps_Values;
2891   B->assembled = PETSC_FALSE;
2892 
2893   B->insertmode = NOT_SET_VALUES;
2894   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2895   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2896 
2897   /* build local table of row and column ownerships */
2898   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2899 
2900   /* build cache for off array entries formed */
2901   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2902 
2903   b->donotstash  = PETSC_FALSE;
2904   b->colmap      = NULL;
2905   b->garray      = NULL;
2906   b->roworiented = PETSC_TRUE;
2907 
2908   /* stuff used in block assembly */
2909   b->barray = NULL;
2910 
2911   /* stuff used for matrix vector multiply */
2912   b->lvec  = NULL;
2913   b->Mvctx = NULL;
2914 
2915   /* stuff for MatGetRow() */
2916   b->rowindices   = NULL;
2917   b->rowvalues    = NULL;
2918   b->getrowactive = PETSC_FALSE;
2919 
2920   /* hash table stuff */
2921   b->ht           = NULL;
2922   b->hd           = NULL;
2923   b->ht_size      = 0;
2924   b->ht_flag      = PETSC_FALSE;
2925   b->ht_fact      = 0;
2926   b->ht_total_ct  = 0;
2927   b->ht_insert_ct = 0;
2928 
2929   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2930   b->ijonly = PETSC_FALSE;
2931 
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2935 #if defined(PETSC_HAVE_HYPRE)
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2937 #endif
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2943   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2944   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2945   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2946 
2947   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2948   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2949   if (flg) {
2950     PetscReal fact = 1.39;
2951     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2952     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2953     if (fact <= 1.0) fact = 1.39;
2954     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2955     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2956   }
2957   PetscOptionsEnd();
2958   PetscFunctionReturn(PETSC_SUCCESS);
2959 }
2960 
2961 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2962 /*MC
2963    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2964 
2965    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2966    and `MATMPIBAIJ` otherwise.
2967 
2968    Options Database Keys:
2969 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2970 
2971   Level: beginner
2972 
2973 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2974 M*/
2975 
2976 /*@
2977   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2978   (block compressed row).
2979 
2980   Collective
2981 
2982   Input Parameters:
2983 + B     - the matrix
2984 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2985           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2986 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2987            submatrix  (same for all local rows)
2988 . d_nnz - array containing the number of block nonzeros in the various block rows
2989            of the in diagonal portion of the local (possibly different for each block
2990            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2991            set it even if it is zero.
2992 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2993            submatrix (same for all local rows).
2994 - o_nnz - array containing the number of nonzeros in the various block rows of the
2995            off-diagonal portion of the local submatrix (possibly different for
2996            each block row) or `NULL`.
2997 
2998    If the *_nnz parameter is given then the *_nz parameter is ignored
2999 
3000   Options Database Keys:
3001 + -mat_block_size            - size of the blocks to use
3002 - -mat_use_hash_table <fact> - set hash table factor
3003 
3004   Level: intermediate
3005 
3006   Notes:
3007   For good matrix assembly performance
3008   the user should preallocate the matrix storage by setting the parameters
3009   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3010   performance can be increased by more than a factor of 50.
3011 
3012   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3013   than it must be used on all processors that share the object for that argument.
3014 
3015   Storage Information:
3016   For a square global matrix we define each processor's diagonal portion
3017   to be its local rows and the corresponding columns (a square submatrix);
3018   each processor's off-diagonal portion encompasses the remainder of the
3019   local matrix (a rectangular submatrix).
3020 
3021   The user can specify preallocated storage for the diagonal part of
3022   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3023   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3024   memory allocation.  Likewise, specify preallocated storage for the
3025   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3026 
3027   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3028   the figure below we depict these three local rows and all columns (0-11).
3029 
3030 .vb
3031            0 1 2 3 4 5 6 7 8 9 10 11
3032           --------------------------
3033    row 3  |o o o d d d o o o o  o  o
3034    row 4  |o o o d d d o o o o  o  o
3035    row 5  |o o o d d d o o o o  o  o
3036           --------------------------
3037 .ve
3038 
3039   Thus, any entries in the d locations are stored in the d (diagonal)
3040   submatrix, and any entries in the o locations are stored in the
3041   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3042   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3043 
3044   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3045   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3046   In general, for PDE problems in which most nonzeros are near the diagonal,
3047   one expects `d_nz` >> `o_nz`.
3048 
3049   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3050   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3051   You can also run with the option `-info` and look for messages with the string
3052   malloc in them to see if additional memory allocation was needed.
3053 
3054 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3055 @*/
3056 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3057 {
3058   PetscFunctionBegin;
3059   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3060   PetscValidType(B, 1);
3061   PetscValidLogicalCollectiveInt(B, bs, 2);
3062   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3063   PetscFunctionReturn(PETSC_SUCCESS);
3064 }
3065 
3066 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3067 /*@
3068   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3069   (block compressed row).
3070 
3071   Collective
3072 
3073   Input Parameters:
3074 + comm  - MPI communicator
3075 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3076           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3077 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3078           This value should be the same as the local size used in creating the
3079           y vector for the matrix-vector product y = Ax.
3080 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3081           This value should be the same as the local size used in creating the
3082           x vector for the matrix-vector product y = Ax.
3083 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3084 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3085 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3086           submatrix  (same for all local rows)
3087 . d_nnz - array containing the number of nonzero blocks in the various block rows
3088           of the in diagonal portion of the local (possibly different for each block
3089           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3090           and set it even if it is zero.
3091 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3092           submatrix (same for all local rows).
3093 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3094           off-diagonal portion of the local submatrix (possibly different for
3095           each block row) or NULL.
3096 
3097   Output Parameter:
3098 . A - the matrix
3099 
3100   Options Database Keys:
3101 + -mat_block_size            - size of the blocks to use
3102 - -mat_use_hash_table <fact> - set hash table factor
3103 
3104   Level: intermediate
3105 
3106   Notes:
3107   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3108   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3109   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3110 
3111   For good matrix assembly performance
3112   the user should preallocate the matrix storage by setting the parameters
3113   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3114   performance can be increased by more than a factor of 50.
3115 
3116   If the *_nnz parameter is given then the *_nz parameter is ignored
3117 
3118   A nonzero block is any block that as 1 or more nonzeros in it
3119 
3120   The user MUST specify either the local or global matrix dimensions
3121   (possibly both).
3122 
3123   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3124   than it must be used on all processors that share the object for that argument.
3125 
3126   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3127   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3128 
3129   Storage Information:
3130   For a square global matrix we define each processor's diagonal portion
3131   to be its local rows and the corresponding columns (a square submatrix);
3132   each processor's off-diagonal portion encompasses the remainder of the
3133   local matrix (a rectangular submatrix).
3134 
3135   The user can specify preallocated storage for the diagonal part of
3136   the local submatrix with either d_nz or d_nnz (not both).  Set
3137   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3138   memory allocation.  Likewise, specify preallocated storage for the
3139   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3140 
3141   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3142   the figure below we depict these three local rows and all columns (0-11).
3143 
3144 .vb
3145            0 1 2 3 4 5 6 7 8 9 10 11
3146           --------------------------
3147    row 3  |o o o d d d o o o o  o  o
3148    row 4  |o o o d d d o o o o  o  o
3149    row 5  |o o o d d d o o o o  o  o
3150           --------------------------
3151 .ve
3152 
3153   Thus, any entries in the d locations are stored in the d (diagonal)
3154   submatrix, and any entries in the o locations are stored in the
3155   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3156   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3157 
3158   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3159   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3160   In general, for PDE problems in which most nonzeros are near the diagonal,
3161   one expects `d_nz` >> `o_nz`.
3162 
3163 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3164           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3165 @*/
3166 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3167 {
3168   PetscMPIInt size;
3169 
3170   PetscFunctionBegin;
3171   PetscCall(MatCreate(comm, A));
3172   PetscCall(MatSetSizes(*A, m, n, M, N));
3173   PetscCallMPI(MPI_Comm_size(comm, &size));
3174   if (size > 1) {
3175     PetscCall(MatSetType(*A, MATMPIBAIJ));
3176     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3177   } else {
3178     PetscCall(MatSetType(*A, MATSEQBAIJ));
3179     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3180   }
3181   PetscFunctionReturn(PETSC_SUCCESS);
3182 }
3183 
3184 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3185 {
3186   Mat          mat;
3187   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3188   PetscInt     len = 0;
3189 
3190   PetscFunctionBegin;
3191   *newmat = NULL;
3192   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3193   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3194   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3195 
3196   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3197   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3198   if (matin->hash_active) {
3199     PetscCall(MatSetUp(mat));
3200   } else {
3201     mat->factortype   = matin->factortype;
3202     mat->preallocated = PETSC_TRUE;
3203     mat->assembled    = PETSC_TRUE;
3204     mat->insertmode   = NOT_SET_VALUES;
3205 
3206     a             = (Mat_MPIBAIJ *)mat->data;
3207     mat->rmap->bs = matin->rmap->bs;
3208     a->bs2        = oldmat->bs2;
3209     a->mbs        = oldmat->mbs;
3210     a->nbs        = oldmat->nbs;
3211     a->Mbs        = oldmat->Mbs;
3212     a->Nbs        = oldmat->Nbs;
3213 
3214     a->size         = oldmat->size;
3215     a->rank         = oldmat->rank;
3216     a->donotstash   = oldmat->donotstash;
3217     a->roworiented  = oldmat->roworiented;
3218     a->rowindices   = NULL;
3219     a->rowvalues    = NULL;
3220     a->getrowactive = PETSC_FALSE;
3221     a->barray       = NULL;
3222     a->rstartbs     = oldmat->rstartbs;
3223     a->rendbs       = oldmat->rendbs;
3224     a->cstartbs     = oldmat->cstartbs;
3225     a->cendbs       = oldmat->cendbs;
3226 
3227     /* hash table stuff */
3228     a->ht           = NULL;
3229     a->hd           = NULL;
3230     a->ht_size      = 0;
3231     a->ht_flag      = oldmat->ht_flag;
3232     a->ht_fact      = oldmat->ht_fact;
3233     a->ht_total_ct  = 0;
3234     a->ht_insert_ct = 0;
3235 
3236     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3237     if (oldmat->colmap) {
3238 #if defined(PETSC_USE_CTABLE)
3239       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3240 #else
3241       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3242       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3243 #endif
3244     } else a->colmap = NULL;
3245 
3246     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3247       PetscCall(PetscMalloc1(len, &a->garray));
3248       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3249     } else a->garray = NULL;
3250 
3251     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3252     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3253     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3254 
3255     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3256     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3257   }
3258   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3259   *newmat = mat;
3260   PetscFunctionReturn(PETSC_SUCCESS);
3261 }
3262 
3263 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3264 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3265 {
3266   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3267   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3268   PetscScalar *matvals;
3269 
3270   PetscFunctionBegin;
3271   PetscCall(PetscViewerSetUp(viewer));
3272 
3273   /* read in matrix header */
3274   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3275   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3276   M  = header[1];
3277   N  = header[2];
3278   nz = header[3];
3279   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3280   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3281   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3282 
3283   /* set block sizes from the viewer's .info file */
3284   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3285   /* set local sizes if not set already */
3286   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3287   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3288   /* set global sizes if not set already */
3289   if (mat->rmap->N < 0) mat->rmap->N = M;
3290   if (mat->cmap->N < 0) mat->cmap->N = N;
3291   PetscCall(PetscLayoutSetUp(mat->rmap));
3292   PetscCall(PetscLayoutSetUp(mat->cmap));
3293 
3294   /* check if the matrix sizes are correct */
3295   PetscCall(MatGetSize(mat, &rows, &cols));
3296   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3297   PetscCall(MatGetBlockSize(mat, &bs));
3298   PetscCall(MatGetLocalSize(mat, &m, &n));
3299   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3300   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3301   mbs = m / bs;
3302   nbs = n / bs;
3303 
3304   /* read in row lengths and build row indices */
3305   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3306   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3307   rowidxs[0] = 0;
3308   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3309   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3310   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3311 
3312   /* read in column indices and matrix values */
3313   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3314   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3315   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3316 
3317   {                /* preallocate matrix storage */
3318     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3319     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3320     PetscBool  sbaij, done;
3321     PetscInt  *d_nnz, *o_nnz;
3322 
3323     PetscCall(PetscBTCreate(nbs, &bt));
3324     PetscCall(PetscHSetICreate(&ht));
3325     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3326     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3327     for (i = 0; i < mbs; i++) {
3328       PetscCall(PetscBTMemzero(nbs, bt));
3329       PetscCall(PetscHSetIClear(ht));
3330       for (k = 0; k < bs; k++) {
3331         PetscInt row = bs * i + k;
3332         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3333           PetscInt col = colidxs[j];
3334           if (!sbaij || col >= row) {
3335             if (col >= cs && col < ce) {
3336               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3337             } else {
3338               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3339               if (done) o_nnz[i]++;
3340             }
3341           }
3342         }
3343       }
3344     }
3345     PetscCall(PetscBTDestroy(&bt));
3346     PetscCall(PetscHSetIDestroy(&ht));
3347     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3348     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3349     PetscCall(PetscFree2(d_nnz, o_nnz));
3350   }
3351 
3352   /* store matrix values */
3353   for (i = 0; i < m; i++) {
3354     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3355     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3356   }
3357 
3358   PetscCall(PetscFree(rowidxs));
3359   PetscCall(PetscFree2(colidxs, matvals));
3360   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3361   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3362   PetscFunctionReturn(PETSC_SUCCESS);
3363 }
3364 
3365 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3366 {
3367   PetscBool isbinary;
3368 
3369   PetscFunctionBegin;
3370   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3371   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3372   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3373   PetscFunctionReturn(PETSC_SUCCESS);
3374 }
3375 
3376 /*@
3377   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3378 
3379   Input Parameters:
3380 + mat  - the matrix
3381 - fact - factor
3382 
3383   Options Database Key:
3384 . -mat_use_hash_table <fact> - provide the factor
3385 
3386   Level: advanced
3387 
3388 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3389 @*/
3390 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3391 {
3392   PetscFunctionBegin;
3393   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3394   PetscFunctionReturn(PETSC_SUCCESS);
3395 }
3396 
3397 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3398 {
3399   Mat_MPIBAIJ *baij;
3400 
3401   PetscFunctionBegin;
3402   baij          = (Mat_MPIBAIJ *)mat->data;
3403   baij->ht_fact = fact;
3404   PetscFunctionReturn(PETSC_SUCCESS);
3405 }
3406 
3407 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3408 {
3409   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3410   PetscBool    flg;
3411 
3412   PetscFunctionBegin;
3413   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3414   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3415   if (Ad) *Ad = a->A;
3416   if (Ao) *Ao = a->B;
3417   if (colmap) *colmap = a->garray;
3418   PetscFunctionReturn(PETSC_SUCCESS);
3419 }
3420 
3421 /*
3422     Special version for direct calls from Fortran (to eliminate two function call overheads
3423 */
3424 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3425   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3426 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3427   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3428 #endif
3429 
3430 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3431 /*@C
3432   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3433 
3434   Collective
3435 
3436   Input Parameters:
3437 + matin  - the matrix
3438 . min    - number of input rows
3439 . im     - input rows
3440 . nin    - number of input columns
3441 . in     - input columns
3442 . v      - numerical values input
3443 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3444 
3445   Level: advanced
3446 
3447   Developer Notes:
3448   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3449 
3450 .seealso: `Mat`, `MatSetValuesBlocked()`
3451 @*/
3452 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3453 {
3454   /* convert input arguments to C version */
3455   Mat        mat = *matin;
3456   PetscInt   m = *min, n = *nin;
3457   InsertMode addv = *addvin;
3458 
3459   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3460   const MatScalar *value;
3461   MatScalar       *barray      = baij->barray;
3462   PetscBool        roworiented = baij->roworiented;
3463   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3464   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3465   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3466 
3467   PetscFunctionBegin;
3468   /* tasks normally handled by MatSetValuesBlocked() */
3469   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3470   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3471   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3472   if (mat->assembled) {
3473     mat->was_assembled = PETSC_TRUE;
3474     mat->assembled     = PETSC_FALSE;
3475   }
3476   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3477 
3478   if (!barray) {
3479     PetscCall(PetscMalloc1(bs2, &barray));
3480     baij->barray = barray;
3481   }
3482 
3483   if (roworiented) stepval = (n - 1) * bs;
3484   else stepval = (m - 1) * bs;
3485 
3486   for (i = 0; i < m; i++) {
3487     if (im[i] < 0) continue;
3488     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3489     if (im[i] >= rstart && im[i] < rend) {
3490       row = im[i] - rstart;
3491       for (j = 0; j < n; j++) {
3492         /* If NumCol = 1 then a copy is not required */
3493         if ((roworiented) && (n == 1)) {
3494           barray = (MatScalar *)v + i * bs2;
3495         } else if ((!roworiented) && (m == 1)) {
3496           barray = (MatScalar *)v + j * bs2;
3497         } else { /* Here a copy is required */
3498           if (roworiented) {
3499             value = v + i * (stepval + bs) * bs + j * bs;
3500           } else {
3501             value = v + j * (stepval + bs) * bs + i * bs;
3502           }
3503           for (ii = 0; ii < bs; ii++, value += stepval) {
3504             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3505           }
3506           barray -= bs2;
3507         }
3508 
3509         if (in[j] >= cstart && in[j] < cend) {
3510           col = in[j] - cstart;
3511           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3512         } else if (in[j] < 0) {
3513           continue;
3514         } else {
3515           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3516           if (mat->was_assembled) {
3517             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3518 
3519 #if defined(PETSC_USE_DEBUG)
3520   #if defined(PETSC_USE_CTABLE)
3521             {
3522               PetscInt data;
3523               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3524               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3525             }
3526   #else
3527             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3528   #endif
3529 #endif
3530 #if defined(PETSC_USE_CTABLE)
3531             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3532             col = (col - 1) / bs;
3533 #else
3534             col = (baij->colmap[in[j]] - 1) / bs;
3535 #endif
3536             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3537               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3538               col = in[j];
3539             }
3540           } else col = in[j];
3541           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3542         }
3543       }
3544     } else {
3545       if (!baij->donotstash) {
3546         if (roworiented) {
3547           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3548         } else {
3549           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3550         }
3551       }
3552     }
3553   }
3554 
3555   /* task normally handled by MatSetValuesBlocked() */
3556   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3557   PetscFunctionReturn(PETSC_SUCCESS);
3558 }
3559 
3560 /*@
3561   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3562 
3563   Collective
3564 
3565   Input Parameters:
3566 + comm - MPI communicator
3567 . bs   - the block size, only a block size of 1 is supported
3568 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3569 . n    - This value should be the same as the local size used in creating the
3570          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3571          calculated if `N` is given) For square matrices `n` is almost always `m`.
3572 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3573 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3574 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3575 . j    - column indices
3576 - a    - matrix values
3577 
3578   Output Parameter:
3579 . mat - the matrix
3580 
3581   Level: intermediate
3582 
3583   Notes:
3584   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3585   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3586   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3587 
3588   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3589   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3590   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3591   with column-major ordering within blocks.
3592 
3593   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3594 
3595 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3596           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3597 @*/
3598 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3599 {
3600   PetscFunctionBegin;
3601   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3602   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3603   PetscCall(MatCreate(comm, mat));
3604   PetscCall(MatSetSizes(*mat, m, n, M, N));
3605   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3606   PetscCall(MatSetBlockSize(*mat, bs));
3607   PetscCall(MatSetUp(*mat));
3608   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3609   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3610   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3611   PetscFunctionReturn(PETSC_SUCCESS);
3612 }
3613 
3614 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3615 {
3616   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3617   PetscInt    *indx;
3618   PetscScalar *values;
3619 
3620   PetscFunctionBegin;
3621   PetscCall(MatGetSize(inmat, &m, &N));
3622   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3623     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3624     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3625     PetscInt    *bindx, rmax = a->rmax, j;
3626     PetscMPIInt  rank, size;
3627 
3628     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3629     mbs = m / bs;
3630     Nbs = N / cbs;
3631     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3632     nbs = n / cbs;
3633 
3634     PetscCall(PetscMalloc1(rmax, &bindx));
3635     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3636 
3637     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3638     PetscCallMPI(MPI_Comm_rank(comm, &size));
3639     if (rank == size - 1) {
3640       /* Check sum(nbs) = Nbs */
3641       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3642     }
3643 
3644     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3645     for (i = 0; i < mbs; i++) {
3646       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3647       nnz = nnz / bs;
3648       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3649       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3650       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3651     }
3652     PetscCall(PetscFree(bindx));
3653 
3654     PetscCall(MatCreate(comm, outmat));
3655     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3656     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3657     PetscCall(MatSetType(*outmat, MATBAIJ));
3658     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3659     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3660     MatPreallocateEnd(dnz, onz);
3661     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3662   }
3663 
3664   /* numeric phase */
3665   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3666   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3667 
3668   for (i = 0; i < m; i++) {
3669     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3670     Ii = i + rstart;
3671     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3672     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3673   }
3674   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3675   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3676   PetscFunctionReturn(PETSC_SUCCESS);
3677 }
3678