xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ae51ba108530e1a8d2aeb8bc3771a24c2fcd801e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
14   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
15   PetscScalar       *va, *vv;
16   Vec                vB, vA;
17   const PetscScalar *vb;
18 
19   PetscFunctionBegin;
20   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
21   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
22 
23   PetscCall(VecGetArrayWrite(vA, &va));
24   if (idx) {
25     for (i = 0; i < m; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
31   PetscCall(PetscMalloc1(m, &idxb));
32   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
33 
34   PetscCall(VecGetArrayWrite(v, &vv));
35   PetscCall(VecGetArrayRead(vB, &vb));
36   for (i = 0; i < m; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
38       vv[i] = vb[i];
39       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
40     } else {
41       vv[i] = va[i];
42       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
43     }
44   }
45   PetscCall(VecRestoreArrayWrite(vA, &vv));
46   PetscCall(VecRestoreArrayWrite(vA, &va));
47   PetscCall(VecRestoreArrayRead(vB, &vb));
48   PetscCall(PetscFree(idxb));
49   PetscCall(VecDestroy(&vA));
50   PetscCall(VecDestroy(&vB));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
57 
58   PetscFunctionBegin;
59   PetscCall(MatStoreValues(aij->A));
60   PetscCall(MatStoreValues(aij->B));
61   PetscFunctionReturn(PETSC_SUCCESS);
62 }
63 
64 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
65 {
66   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
67 
68   PetscFunctionBegin;
69   PetscCall(MatRetrieveValues(aij->A));
70   PetscCall(MatRetrieveValues(aij->B));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 /*
75      Local utility routine that creates a mapping from the global column
76    number to the local number in the off-diagonal part of the local
77    storage of the matrix.  This is done in a non scalable way since the
78    length of colmap equals the global matrix length.
79 */
80 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
81 {
82   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
83   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
84   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
85 
86   PetscFunctionBegin;
87 #if defined(PETSC_USE_CTABLE)
88   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
89   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
90 #else
91   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
92   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
93 #endif
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
98   { \
99     brow = row / bs; \
100     rp   = aj + ai[brow]; \
101     ap   = aa + bs2 * ai[brow]; \
102     rmax = aimax[brow]; \
103     nrow = ailen[brow]; \
104     bcol = col / bs; \
105     ridx = row % bs; \
106     cidx = col % bs; \
107     low  = 0; \
108     high = nrow; \
109     while (high - low > 3) { \
110       t = (low + high) / 2; \
111       if (rp[t] > bcol) high = t; \
112       else low = t; \
113     } \
114     for (_i = low; _i < high; _i++) { \
115       if (rp[_i] > bcol) break; \
116       if (rp[_i] == bcol) { \
117         bap = ap + bs2 * _i + bs * cidx + ridx; \
118         if (addv == ADD_VALUES) *bap += value; \
119         else *bap = value; \
120         goto a_noinsert; \
121       } \
122     } \
123     if (a->nonew == 1) goto a_noinsert; \
124     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
125     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
126     N = nrow++ - 1; \
127     /* shift up all the later entries in this row */ \
128     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
129     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
130     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
131     rp[_i]                          = bcol; \
132     ap[bs2 * _i + bs * cidx + ridx] = value; \
133   a_noinsert:; \
134     ailen[brow] = nrow; \
135   }
136 
137 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
138   { \
139     brow = row / bs; \
140     rp   = bj + bi[brow]; \
141     ap   = ba + bs2 * bi[brow]; \
142     rmax = bimax[brow]; \
143     nrow = bilen[brow]; \
144     bcol = col / bs; \
145     ridx = row % bs; \
146     cidx = col % bs; \
147     low  = 0; \
148     high = nrow; \
149     while (high - low > 3) { \
150       t = (low + high) / 2; \
151       if (rp[t] > bcol) high = t; \
152       else low = t; \
153     } \
154     for (_i = low; _i < high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap + bs2 * _i + bs * cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value; \
159         else *bap = value; \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
166     N = nrow++ - 1; \
167     /* shift up all the later entries in this row */ \
168     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
169     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
170     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
171     rp[_i]                          = bcol; \
172     ap[bs2 * _i + bs * cidx + ridx] = value; \
173   b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
178 {
179   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
180   MatScalar    value;
181   PetscBool    roworiented = baij->roworiented;
182   PetscInt     i, j, row, col;
183   PetscInt     rstart_orig = mat->rmap->rstart;
184   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
185   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
186 
187   /* Some Variables required in the macro */
188   Mat          A     = baij->A;
189   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
190   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
191   MatScalar   *aa = a->a;
192 
193   Mat          B     = baij->B;
194   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
195   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
196   MatScalar   *ba = b->a;
197 
198   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
199   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
200   MatScalar *ap, *bap;
201 
202   PetscFunctionBegin;
203   for (i = 0; i < m; i++) {
204     if (im[i] < 0) continue;
205     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j = 0; j < n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i * n + j];
212           else value = v[i + j * m];
213           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
214         } else if (in[j] < 0) {
215           continue;
216         } else {
217           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
218           if (mat->was_assembled) {
219             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
220 #if defined(PETSC_USE_CTABLE)
221             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
222             col = col - 1;
223 #else
224             col = baij->colmap[in[j] / bs] - 1;
225 #endif
226             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
227               PetscCall(MatDisAssemble_MPIBAIJ(mat));
228               col = in[j];
229               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
230               B     = baij->B;
231               b     = (Mat_SeqBAIJ *)(B)->data;
232               bimax = b->imax;
233               bi    = b->i;
234               bilen = b->ilen;
235               bj    = b->j;
236               ba    = b->a;
237             } else {
238               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
239               col += in[j] % bs;
240             }
241           } else col = in[j];
242           if (roworiented) value = v[i * n + j];
243           else value = v[i + j * m];
244           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
245           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
246         }
247       }
248     } else {
249       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
250       if (!baij->donotstash) {
251         mat->assembled = PETSC_FALSE;
252         if (roworiented) {
253           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
254         } else {
255           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
256         }
257       }
258     }
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
264 {
265   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
266   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
267   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
268   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
269   PetscBool          roworiented = a->roworiented;
270   const PetscScalar *value       = v;
271   MatScalar         *ap, *aa = a->a, *bap;
272 
273   PetscFunctionBegin;
274   rp    = aj + ai[row];
275   ap    = aa + bs2 * ai[row];
276   rmax  = imax[row];
277   nrow  = ailen[row];
278   value = v;
279   low   = 0;
280   high  = nrow;
281   while (high - low > 7) {
282     t = (low + high) / 2;
283     if (rp[t] > col) high = t;
284     else low = t;
285   }
286   for (i = low; i < high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap + bs2 * i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii = 0; ii < bs; ii++) {
293             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
294           }
295         } else {
296           for (ii = 0; ii < bs; ii++) {
297             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
298           }
299         }
300       } else {
301         if (is == ADD_VALUES) {
302           for (ii = 0; ii < bs; ii++, value += bs) {
303             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
304             bap += bs;
305           }
306         } else {
307           for (ii = 0; ii < bs; ii++, value += bs) {
308             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
309             bap += bs;
310           }
311         }
312       }
313       goto noinsert2;
314     }
315   }
316   if (nonew == 1) goto noinsert2;
317   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
318   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
319   N = nrow++ - 1;
320   high++;
321   /* shift up all the later entries in this row */
322   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
323   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
324   rp[i] = col;
325   bap   = ap + bs2 * i;
326   if (roworiented) {
327     for (ii = 0; ii < bs; ii++) {
328       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
329     }
330   } else {
331     for (ii = 0; ii < bs; ii++) {
332       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
333     }
334   }
335 noinsert2:;
336   ailen[row] = nrow;
337   PetscFunctionReturn(PETSC_SUCCESS);
338 }
339 
340 /*
341     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
342     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
343 */
344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
345 {
346   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
347   const PetscScalar *value;
348   MatScalar         *barray      = baij->barray;
349   PetscBool          roworiented = baij->roworiented;
350   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
351   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
352   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
353 
354   PetscFunctionBegin;
355   if (!barray) {
356     PetscCall(PetscMalloc1(bs2, &barray));
357     baij->barray = barray;
358   }
359 
360   if (roworiented) stepval = (n - 1) * bs;
361   else stepval = (m - 1) * bs;
362 
363   for (i = 0; i < m; i++) {
364     if (im[i] < 0) continue;
365     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
366     if (im[i] >= rstart && im[i] < rend) {
367       row = im[i] - rstart;
368       for (j = 0; j < n; j++) {
369         /* If NumCol = 1 then a copy is not required */
370         if ((roworiented) && (n == 1)) {
371           barray = (MatScalar *)v + i * bs2;
372         } else if ((!roworiented) && (m == 1)) {
373           barray = (MatScalar *)v + j * bs2;
374         } else { /* Here a copy is required */
375           if (roworiented) {
376             value = v + (i * (stepval + bs) + j) * bs;
377           } else {
378             value = v + (j * (stepval + bs) + i) * bs;
379           }
380           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
381             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
382             barray += bs;
383           }
384           barray -= bs2;
385         }
386 
387         if (in[j] >= cstart && in[j] < cend) {
388           col = in[j] - cstart;
389           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
390         } else if (in[j] < 0) {
391           continue;
392         } else {
393           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
394           if (mat->was_assembled) {
395             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
396 
397 #if defined(PETSC_USE_DEBUG)
398   #if defined(PETSC_USE_CTABLE)
399             {
400               PetscInt data;
401               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
402               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
403             }
404   #else
405             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
406   #endif
407 #endif
408 #if defined(PETSC_USE_CTABLE)
409             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
410             col = (col - 1) / bs;
411 #else
412             col = (baij->colmap[in[j]] - 1) / bs;
413 #endif
414             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
415               PetscCall(MatDisAssemble_MPIBAIJ(mat));
416               col = in[j];
417             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
418           } else col = in[j];
419           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
420         }
421       }
422     } else {
423       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
424       if (!baij->donotstash) {
425         if (roworiented) {
426           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
427         } else {
428           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
429         }
430       }
431     }
432   }
433   PetscFunctionReturn(PETSC_SUCCESS);
434 }
435 
436 #define HASH_KEY             0.6180339887
437 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
438 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
439 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
440 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
441 {
442   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
443   PetscBool    roworiented = baij->roworiented;
444   PetscInt     i, j, row, col;
445   PetscInt     rstart_orig = mat->rmap->rstart;
446   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
447   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
448   PetscReal    tmp;
449   MatScalar  **HD       = baij->hd, value;
450   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
451 
452   PetscFunctionBegin;
453   for (i = 0; i < m; i++) {
454     if (PetscDefined(USE_DEBUG)) {
455       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
456       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     }
458     row = im[i];
459     if (row >= rstart_orig && row < rend_orig) {
460       for (j = 0; j < n; j++) {
461         col = in[j];
462         if (roworiented) value = v[i * n + j];
463         else value = v[i + j * m];
464         /* Look up PetscInto the Hash Table */
465         key = (row / bs) * Nbs + (col / bs) + 1;
466         h1  = HASH(size, key, tmp);
467 
468         idx = h1;
469         if (PetscDefined(USE_DEBUG)) {
470           insert_ct++;
471           total_ct++;
472           if (HT[idx] != key) {
473             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
474               ;
475             if (idx == size) {
476               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
477                 ;
478               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
479             }
480           }
481         } else if (HT[idx] != key) {
482           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
483             ;
484           if (idx == size) {
485             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
486               ;
487             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
488           }
489         }
490         /* A HASH table entry is found, so insert the values at the correct address */
491         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
492         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
493       }
494     } else if (!baij->donotstash) {
495       if (roworiented) {
496         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
497       } else {
498         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
499       }
500     }
501   }
502   if (PetscDefined(USE_DEBUG)) {
503     baij->ht_total_ct += total_ct;
504     baij->ht_insert_ct += insert_ct;
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
512   PetscBool          roworiented = baij->roworiented;
513   PetscInt           i, j, ii, jj, row, col;
514   PetscInt           rstart = baij->rstartbs;
515   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
516   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
517   PetscReal          tmp;
518   MatScalar        **HD = baij->hd, *baij_a;
519   const PetscScalar *v_t, *value;
520   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
521 
522   PetscFunctionBegin;
523   if (roworiented) stepval = (n - 1) * bs;
524   else stepval = (m - 1) * bs;
525 
526   for (i = 0; i < m; i++) {
527     if (PetscDefined(USE_DEBUG)) {
528       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
529       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
530     }
531     row = im[i];
532     v_t = v + i * nbs2;
533     if (row >= rstart && row < rend) {
534       for (j = 0; j < n; j++) {
535         col = in[j];
536 
537         /* Look up into the Hash Table */
538         key = row * Nbs + col + 1;
539         h1  = HASH(size, key, tmp);
540 
541         idx = h1;
542         if (PetscDefined(USE_DEBUG)) {
543           total_ct++;
544           insert_ct++;
545           if (HT[idx] != key) {
546             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
547               ;
548             if (idx == size) {
549               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
550                 ;
551               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
552             }
553           }
554         } else if (HT[idx] != key) {
555           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
556             ;
557           if (idx == size) {
558             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
559               ;
560             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
561           }
562         }
563         baij_a = HD[idx];
564         if (roworiented) {
565           /*value = v + i*(stepval+bs)*bs + j*bs;*/
566           /* value = v + (i*(stepval+bs)+j)*bs; */
567           value = v_t;
568           v_t += bs;
569           if (addv == ADD_VALUES) {
570             for (ii = 0; ii < bs; ii++, value += stepval) {
571               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
572             }
573           } else {
574             for (ii = 0; ii < bs; ii++, value += stepval) {
575               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
576             }
577           }
578         } else {
579           value = v + j * (stepval + bs) * bs + i * bs;
580           if (addv == ADD_VALUES) {
581             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
582               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
583             }
584           } else {
585             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
586               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
587             }
588           }
589         }
590       }
591     } else {
592       if (!baij->donotstash) {
593         if (roworiented) {
594           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
595         } else {
596           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
597         }
598       }
599     }
600   }
601   if (PetscDefined(USE_DEBUG)) {
602     baij->ht_total_ct += total_ct;
603     baij->ht_insert_ct += insert_ct;
604   }
605   PetscFunctionReturn(PETSC_SUCCESS);
606 }
607 
608 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
609 {
610   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
611   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
612   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
613 
614   PetscFunctionBegin;
615   for (i = 0; i < m; i++) {
616     if (idxm[i] < 0) continue; /* negative row */
617     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
618     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
619       row = idxm[i] - bsrstart;
620       for (j = 0; j < n; j++) {
621         if (idxn[j] < 0) continue; /* negative column */
622         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
623         if (idxn[j] >= bscstart && idxn[j] < bscend) {
624           col = idxn[j] - bscstart;
625           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
626         } else {
627           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
628 #if defined(PETSC_USE_CTABLE)
629           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
630           data--;
631 #else
632           data = baij->colmap[idxn[j] / bs] - 1;
633 #endif
634           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
635           else {
636             col = data + idxn[j] % bs;
637             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
638           }
639         }
640       }
641     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
642   }
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
647 {
648   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
649   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
650   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
651   PetscReal    sum = 0.0;
652   MatScalar   *v;
653 
654   PetscFunctionBegin;
655   if (baij->size == 1) {
656     PetscCall(MatNorm(baij->A, type, nrm));
657   } else {
658     if (type == NORM_FROBENIUS) {
659       v  = amat->a;
660       nz = amat->nz * bs2;
661       for (i = 0; i < nz; i++) {
662         sum += PetscRealPart(PetscConj(*v) * (*v));
663         v++;
664       }
665       v  = bmat->a;
666       nz = bmat->nz * bs2;
667       for (i = 0; i < nz; i++) {
668         sum += PetscRealPart(PetscConj(*v) * (*v));
669         v++;
670       }
671       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
672       *nrm = PetscSqrtReal(*nrm);
673     } else if (type == NORM_1) { /* max column sum */
674       PetscReal *tmp, *tmp2;
675       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
676       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
677       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
678       v  = amat->a;
679       jj = amat->j;
680       for (i = 0; i < amat->nz; i++) {
681         for (j = 0; j < bs; j++) {
682           col = bs * (cstart + *jj) + j; /* column index */
683           for (row = 0; row < bs; row++) {
684             tmp[col] += PetscAbsScalar(*v);
685             v++;
686           }
687         }
688         jj++;
689       }
690       v  = bmat->a;
691       jj = bmat->j;
692       for (i = 0; i < bmat->nz; i++) {
693         for (j = 0; j < bs; j++) {
694           col = bs * garray[*jj] + j;
695           for (row = 0; row < bs; row++) {
696             tmp[col] += PetscAbsScalar(*v);
697             v++;
698           }
699         }
700         jj++;
701       }
702       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
703       *nrm = 0.0;
704       for (j = 0; j < mat->cmap->N; j++) {
705         if (tmp2[j] > *nrm) *nrm = tmp2[j];
706       }
707       PetscCall(PetscFree(tmp));
708       PetscCall(PetscFree(tmp2));
709     } else if (type == NORM_INFINITY) { /* max row sum */
710       PetscReal *sums;
711       PetscCall(PetscMalloc1(bs, &sums));
712       sum = 0.0;
713       for (j = 0; j < amat->mbs; j++) {
714         for (row = 0; row < bs; row++) sums[row] = 0.0;
715         v  = amat->a + bs2 * amat->i[j];
716         nz = amat->i[j + 1] - amat->i[j];
717         for (i = 0; i < nz; i++) {
718           for (col = 0; col < bs; col++) {
719             for (row = 0; row < bs; row++) {
720               sums[row] += PetscAbsScalar(*v);
721               v++;
722             }
723           }
724         }
725         v  = bmat->a + bs2 * bmat->i[j];
726         nz = bmat->i[j + 1] - bmat->i[j];
727         for (i = 0; i < nz; i++) {
728           for (col = 0; col < bs; col++) {
729             for (row = 0; row < bs; row++) {
730               sums[row] += PetscAbsScalar(*v);
731               v++;
732             }
733           }
734         }
735         for (row = 0; row < bs; row++) {
736           if (sums[row] > sum) sum = sums[row];
737         }
738       }
739       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
740       PetscCall(PetscFree(sums));
741     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
742   }
743   PetscFunctionReturn(PETSC_SUCCESS);
744 }
745 
746 /*
747   Creates the hash table, and sets the table
748   This table is created only once.
749   If new entried need to be added to the matrix
750   then the hash table has to be destroyed and
751   recreated.
752 */
753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
754 {
755   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
756   Mat          A = baij->A, B = baij->B;
757   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
758   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
759   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
760   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
761   PetscInt    *HT, key;
762   MatScalar  **HD;
763   PetscReal    tmp;
764 #if defined(PETSC_USE_INFO)
765   PetscInt ct = 0, max = 0;
766 #endif
767 
768   PetscFunctionBegin;
769   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
770 
771   baij->ht_size = (PetscInt)(factor * nz);
772   ht_size       = baij->ht_size;
773 
774   /* Allocate Memory for Hash Table */
775   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
776   HD = baij->hd;
777   HT = baij->ht;
778 
779   /* Loop Over A */
780   for (i = 0; i < a->mbs; i++) {
781     for (j = ai[i]; j < ai[i + 1]; j++) {
782       row = i + rstart;
783       col = aj[j] + cstart;
784 
785       key = row * Nbs + col + 1;
786       h1  = HASH(ht_size, key, tmp);
787       for (k = 0; k < ht_size; k++) {
788         if (!HT[(h1 + k) % ht_size]) {
789           HT[(h1 + k) % ht_size] = key;
790           HD[(h1 + k) % ht_size] = a->a + j * bs2;
791           break;
792 #if defined(PETSC_USE_INFO)
793         } else {
794           ct++;
795 #endif
796         }
797       }
798 #if defined(PETSC_USE_INFO)
799       if (k > max) max = k;
800 #endif
801     }
802   }
803   /* Loop Over B */
804   for (i = 0; i < b->mbs; i++) {
805     for (j = bi[i]; j < bi[i + 1]; j++) {
806       row = i + rstart;
807       col = garray[bj[j]];
808       key = row * Nbs + col + 1;
809       h1  = HASH(ht_size, key, tmp);
810       for (k = 0; k < ht_size; k++) {
811         if (!HT[(h1 + k) % ht_size]) {
812           HT[(h1 + k) % ht_size] = key;
813           HD[(h1 + k) % ht_size] = b->a + j * bs2;
814           break;
815 #if defined(PETSC_USE_INFO)
816         } else {
817           ct++;
818 #endif
819         }
820       }
821 #if defined(PETSC_USE_INFO)
822       if (k > max) max = k;
823 #endif
824     }
825   }
826 
827   /* Print Summary */
828 #if defined(PETSC_USE_INFO)
829   for (i = 0, j = 0; i < ht_size; i++) {
830     if (HT[i]) j++;
831   }
832   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
833 #endif
834   PetscFunctionReturn(PETSC_SUCCESS);
835 }
836 
837 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
838 {
839   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
840   PetscInt     nstash, reallocs;
841 
842   PetscFunctionBegin;
843   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
844 
845   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
846   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
847   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
848   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
849   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
850   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
851   PetscFunctionReturn(PETSC_SUCCESS);
852 }
853 
854 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
857   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
858   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
859   PetscInt    *row, *col;
860   PetscBool    r1, r2, r3, other_disassembled;
861   MatScalar   *val;
862   PetscMPIInt  n;
863 
864   PetscFunctionBegin;
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   if (!baij->donotstash && !mat->nooffprocentries) {
867     while (1) {
868       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
869       if (!flg) break;
870 
871       for (i = 0; i < n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j = i, rstart = row[j]; j < n; j++) {
874           if (row[j] != rstart) break;
875         }
876         if (j < n) ncols = j - i;
877         else ncols = n - i;
878         /* Now assemble all these values with a single function call */
879         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
880         i = j;
881       }
882     }
883     PetscCall(MatStashScatterEnd_Private(&mat->stash));
884     /* Now process the block-stash. Since the values are stashed column-oriented,
885        set the roworiented flag to column oriented, and after MatSetValues()
886        restore the original flags */
887     r1 = baij->roworiented;
888     r2 = a->roworiented;
889     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
890 
891     baij->roworiented = PETSC_FALSE;
892     a->roworiented    = PETSC_FALSE;
893 
894     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
895     while (1) {
896       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
897       if (!flg) break;
898 
899       for (i = 0; i < n;) {
900         /* Now identify the consecutive vals belonging to the same row */
901         for (j = i, rstart = row[j]; j < n; j++) {
902           if (row[j] != rstart) break;
903         }
904         if (j < n) ncols = j - i;
905         else ncols = n - i;
906         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
907         i = j;
908       }
909     }
910     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
911 
912     baij->roworiented = r1;
913     a->roworiented    = r2;
914 
915     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
916   }
917 
918   PetscCall(MatAssemblyBegin(baij->A, mode));
919   PetscCall(MatAssemblyEnd(baij->A, mode));
920 
921   /* determine if any processor has disassembled, if so we must
922      also disassemble ourselves, in order that we may reassemble. */
923   /*
924      if nonzero structure of submatrix B cannot change then we know that
925      no processor disassembled thus we can skip this stuff
926   */
927   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
928     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
929     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
930   }
931 
932   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
933   PetscCall(MatAssemblyBegin(baij->B, mode));
934   PetscCall(MatAssemblyEnd(baij->B, mode));
935 
936 #if defined(PETSC_USE_INFO)
937   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
938     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
939 
940     baij->ht_total_ct  = 0;
941     baij->ht_insert_ct = 0;
942   }
943 #endif
944   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
945     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
946 
947     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
948     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
949   }
950 
951   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
952 
953   baij->rowvalues = NULL;
954 
955   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
956   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
957     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
958     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
959   }
960   PetscFunctionReturn(PETSC_SUCCESS);
961 }
962 
963 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
964 #include <petscdraw.h>
965 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
966 {
967   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
968   PetscMPIInt       rank = baij->rank;
969   PetscInt          bs   = mat->rmap->bs;
970   PetscBool         iascii, isdraw;
971   PetscViewer       sviewer;
972   PetscViewerFormat format;
973 
974   PetscFunctionBegin;
975   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
976   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
977   if (iascii) {
978     PetscCall(PetscViewerGetFormat(viewer, &format));
979     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
980       MatInfo info;
981       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
982       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
983       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
984       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
985                                                    mat->rmap->bs, (double)info.memory));
986       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
987       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
988       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
989       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
990       PetscCall(PetscViewerFlush(viewer));
991       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
992       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
993       PetscCall(VecScatterView(baij->Mvctx, viewer));
994       PetscFunctionReturn(PETSC_SUCCESS);
995     } else if (format == PETSC_VIEWER_ASCII_INFO) {
996       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
997       PetscFunctionReturn(PETSC_SUCCESS);
998     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
999       PetscFunctionReturn(PETSC_SUCCESS);
1000     }
1001   }
1002 
1003   if (isdraw) {
1004     PetscDraw draw;
1005     PetscBool isnull;
1006     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1007     PetscCall(PetscDrawIsNull(draw, &isnull));
1008     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1009   }
1010 
1011   {
1012     /* assemble the entire matrix onto first processor. */
1013     Mat          A;
1014     Mat_SeqBAIJ *Aloc;
1015     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1016     MatScalar   *a;
1017     const char  *matname;
1018 
1019     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1020     /* Perhaps this should be the type of mat? */
1021     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1022     if (rank == 0) {
1023       PetscCall(MatSetSizes(A, M, N, M, N));
1024     } else {
1025       PetscCall(MatSetSizes(A, 0, 0, M, N));
1026     }
1027     PetscCall(MatSetType(A, MATMPIBAIJ));
1028     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1029     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1030 
1031     /* copy over the A part */
1032     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1033     ai   = Aloc->i;
1034     aj   = Aloc->j;
1035     a    = Aloc->a;
1036     PetscCall(PetscMalloc1(bs, &rvals));
1037 
1038     for (i = 0; i < mbs; i++) {
1039       rvals[0] = bs * (baij->rstartbs + i);
1040       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1041       for (j = ai[i]; j < ai[i + 1]; j++) {
1042         col = (baij->cstartbs + aj[j]) * bs;
1043         for (k = 0; k < bs; k++) {
1044           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1045           col++;
1046           a += bs;
1047         }
1048       }
1049     }
1050     /* copy over the B part */
1051     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1052     ai   = Aloc->i;
1053     aj   = Aloc->j;
1054     a    = Aloc->a;
1055     for (i = 0; i < mbs; i++) {
1056       rvals[0] = bs * (baij->rstartbs + i);
1057       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1058       for (j = ai[i]; j < ai[i + 1]; j++) {
1059         col = baij->garray[aj[j]] * bs;
1060         for (k = 0; k < bs; k++) {
1061           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1062           col++;
1063           a += bs;
1064         }
1065       }
1066     }
1067     PetscCall(PetscFree(rvals));
1068     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1070     /*
1071        Everyone has to call to draw the matrix since the graphics waits are
1072        synchronized across all processors that share the PetscDraw object
1073     */
1074     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1075     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1076     if (rank == 0) {
1077       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1078       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1079     }
1080     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1081     PetscCall(PetscViewerFlush(viewer));
1082     PetscCall(MatDestroy(&A));
1083   }
1084   PetscFunctionReturn(PETSC_SUCCESS);
1085 }
1086 
1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1089 {
1090   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1091   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1092   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1093   const PetscInt *garray = aij->garray;
1094   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1095   PetscInt       *rowlens, *colidxs;
1096   PetscScalar    *matvals;
1097 
1098   PetscFunctionBegin;
1099   PetscCall(PetscViewerSetUp(viewer));
1100 
1101   M  = mat->rmap->N;
1102   N  = mat->cmap->N;
1103   m  = mat->rmap->n;
1104   rs = mat->rmap->rstart;
1105   cs = mat->cmap->rstart;
1106   bs = mat->rmap->bs;
1107   nz = bs * bs * (A->nz + B->nz);
1108 
1109   /* write matrix header */
1110   header[0] = MAT_FILE_CLASSID;
1111   header[1] = M;
1112   header[2] = N;
1113   header[3] = nz;
1114   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1115   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1116 
1117   /* fill in and store row lengths */
1118   PetscCall(PetscMalloc1(m, &rowlens));
1119   for (cnt = 0, i = 0; i < A->mbs; i++)
1120     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1121   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1122   PetscCall(PetscFree(rowlens));
1123 
1124   /* fill in and store column indices */
1125   PetscCall(PetscMalloc1(nz, &colidxs));
1126   for (cnt = 0, i = 0; i < A->mbs; i++) {
1127     for (k = 0; k < bs; k++) {
1128       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1129         if (garray[B->j[jb]] > cs / bs) break;
1130         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1131       }
1132       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1133         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1134       for (; jb < B->i[i + 1]; jb++)
1135         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1136     }
1137   }
1138   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1139   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1140   PetscCall(PetscFree(colidxs));
1141 
1142   /* fill in and store nonzero values */
1143   PetscCall(PetscMalloc1(nz, &matvals));
1144   for (cnt = 0, i = 0; i < A->mbs; i++) {
1145     for (k = 0; k < bs; k++) {
1146       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1147         if (garray[B->j[jb]] > cs / bs) break;
1148         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1149       }
1150       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1151         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1152       for (; jb < B->i[i + 1]; jb++)
1153         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1154     }
1155   }
1156   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1157   PetscCall(PetscFree(matvals));
1158 
1159   /* write block size option to the viewer's .info file */
1160   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1161   PetscFunctionReturn(PETSC_SUCCESS);
1162 }
1163 
1164 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1165 {
1166   PetscBool iascii, isdraw, issocket, isbinary;
1167 
1168   PetscFunctionBegin;
1169   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1170   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1171   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1172   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1173   if (iascii || isdraw || issocket) {
1174     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1175   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1176   PetscFunctionReturn(PETSC_SUCCESS);
1177 }
1178 
1179 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1180 {
1181   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1182 
1183   PetscFunctionBegin;
1184 #if defined(PETSC_USE_LOG)
1185   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
1186 #endif
1187   PetscCall(MatStashDestroy_Private(&mat->stash));
1188   PetscCall(MatStashDestroy_Private(&mat->bstash));
1189   PetscCall(MatDestroy(&baij->A));
1190   PetscCall(MatDestroy(&baij->B));
1191 #if defined(PETSC_USE_CTABLE)
1192   PetscCall(PetscHMapIDestroy(&baij->colmap));
1193 #else
1194   PetscCall(PetscFree(baij->colmap));
1195 #endif
1196   PetscCall(PetscFree(baij->garray));
1197   PetscCall(VecDestroy(&baij->lvec));
1198   PetscCall(VecScatterDestroy(&baij->Mvctx));
1199   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1200   PetscCall(PetscFree(baij->barray));
1201   PetscCall(PetscFree2(baij->hd, baij->ht));
1202   PetscCall(PetscFree(baij->rangebs));
1203   PetscCall(PetscFree(mat->data));
1204 
1205   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1206   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1207   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1208   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
1209   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
1210   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
1215 #if defined(PETSC_HAVE_HYPRE)
1216   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
1217 #endif
1218   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
1219   PetscFunctionReturn(PETSC_SUCCESS);
1220 }
1221 
1222 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1225   PetscInt     nt;
1226 
1227   PetscFunctionBegin;
1228   PetscCall(VecGetLocalSize(xx, &nt));
1229   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1230   PetscCall(VecGetLocalSize(yy, &nt));
1231   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1232   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1233   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1234   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1235   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1236   PetscFunctionReturn(PETSC_SUCCESS);
1237 }
1238 
1239 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1240 {
1241   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1245   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1246   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1247   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1248   PetscFunctionReturn(PETSC_SUCCESS);
1249 }
1250 
1251 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1252 {
1253   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1254 
1255   PetscFunctionBegin;
1256   /* do nondiagonal part */
1257   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1258   /* do local part */
1259   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1260   /* add partial results together */
1261   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1262   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1263   PetscFunctionReturn(PETSC_SUCCESS);
1264 }
1265 
1266 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1269 
1270   PetscFunctionBegin;
1271   /* do nondiagonal part */
1272   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1273   /* do local part */
1274   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1275   /* add partial results together */
1276   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1277   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 /*
1282   This only works correctly for square matrices where the subblock A->A is the
1283    diagonal block
1284 */
1285 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1286 {
1287   PetscFunctionBegin;
1288   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1289   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1290   PetscFunctionReturn(PETSC_SUCCESS);
1291 }
1292 
1293 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1294 {
1295   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(MatScale(a->A, aa));
1299   PetscCall(MatScale(a->B, aa));
1300   PetscFunctionReturn(PETSC_SUCCESS);
1301 }
1302 
1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1304 {
1305   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1306   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1307   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1308   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1309   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1310 
1311   PetscFunctionBegin;
1312   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1313   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1314   mat->getrowactive = PETSC_TRUE;
1315 
1316   if (!mat->rowvalues && (idx || v)) {
1317     /*
1318         allocate enough space to hold information from the longest row.
1319     */
1320     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1321     PetscInt     max = 1, mbs = mat->mbs, tmp;
1322     for (i = 0; i < mbs; i++) {
1323       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1324       if (max < tmp) max = tmp;
1325     }
1326     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1327   }
1328   lrow = row - brstart;
1329 
1330   pvA = &vworkA;
1331   pcA = &cworkA;
1332   pvB = &vworkB;
1333   pcB = &cworkB;
1334   if (!v) {
1335     pvA = NULL;
1336     pvB = NULL;
1337   }
1338   if (!idx) {
1339     pcA = NULL;
1340     if (!v) pcB = NULL;
1341   }
1342   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1343   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1344   nztot = nzA + nzB;
1345 
1346   cmap = mat->garray;
1347   if (v || idx) {
1348     if (nztot) {
1349       /* Sort by increasing column numbers, assuming A and B already sorted */
1350       PetscInt imark = -1;
1351       if (v) {
1352         *v = v_p = mat->rowvalues;
1353         for (i = 0; i < nzB; i++) {
1354           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1355           else break;
1356         }
1357         imark = i;
1358         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1359         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1360       }
1361       if (idx) {
1362         *idx = idx_p = mat->rowindices;
1363         if (imark > -1) {
1364           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1365         } else {
1366           for (i = 0; i < nzB; i++) {
1367             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368             else break;
1369           }
1370           imark = i;
1371         }
1372         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1373         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1374       }
1375     } else {
1376       if (idx) *idx = NULL;
1377       if (v) *v = NULL;
1378     }
1379   }
1380   *nz = nztot;
1381   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1382   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1387 {
1388   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1389 
1390   PetscFunctionBegin;
1391   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1392   baij->getrowactive = PETSC_FALSE;
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1397 {
1398   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1399 
1400   PetscFunctionBegin;
1401   PetscCall(MatZeroEntries(l->A));
1402   PetscCall(MatZeroEntries(l->B));
1403   PetscFunctionReturn(PETSC_SUCCESS);
1404 }
1405 
1406 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1407 {
1408   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1409   Mat            A = a->A, B = a->B;
1410   PetscLogDouble isend[5], irecv[5];
1411 
1412   PetscFunctionBegin;
1413   info->block_size = (PetscReal)matin->rmap->bs;
1414 
1415   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1416 
1417   isend[0] = info->nz_used;
1418   isend[1] = info->nz_allocated;
1419   isend[2] = info->nz_unneeded;
1420   isend[3] = info->memory;
1421   isend[4] = info->mallocs;
1422 
1423   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1424 
1425   isend[0] += info->nz_used;
1426   isend[1] += info->nz_allocated;
1427   isend[2] += info->nz_unneeded;
1428   isend[3] += info->memory;
1429   isend[4] += info->mallocs;
1430 
1431   if (flag == MAT_LOCAL) {
1432     info->nz_used      = isend[0];
1433     info->nz_allocated = isend[1];
1434     info->nz_unneeded  = isend[2];
1435     info->memory       = isend[3];
1436     info->mallocs      = isend[4];
1437   } else if (flag == MAT_GLOBAL_MAX) {
1438     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1439 
1440     info->nz_used      = irecv[0];
1441     info->nz_allocated = irecv[1];
1442     info->nz_unneeded  = irecv[2];
1443     info->memory       = irecv[3];
1444     info->mallocs      = irecv[4];
1445   } else if (flag == MAT_GLOBAL_SUM) {
1446     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1454   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1455   info->fill_ratio_needed = 0;
1456   info->factor_mallocs    = 0;
1457   PetscFunctionReturn(PETSC_SUCCESS);
1458 }
1459 
1460 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1461 {
1462   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1463 
1464   PetscFunctionBegin;
1465   switch (op) {
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1468   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1469   case MAT_KEEP_NONZERO_PATTERN:
1470   case MAT_NEW_NONZERO_LOCATION_ERR:
1471     MatCheckPreallocated(A, 1);
1472     PetscCall(MatSetOption(a->A, op, flg));
1473     PetscCall(MatSetOption(a->B, op, flg));
1474     break;
1475   case MAT_ROW_ORIENTED:
1476     MatCheckPreallocated(A, 1);
1477     a->roworiented = flg;
1478 
1479     PetscCall(MatSetOption(a->A, op, flg));
1480     PetscCall(MatSetOption(a->B, op, flg));
1481     break;
1482   case MAT_FORCE_DIAGONAL_ENTRIES:
1483   case MAT_SORTED_FULL:
1484     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1485     break;
1486   case MAT_IGNORE_OFF_PROC_ENTRIES:
1487     a->donotstash = flg;
1488     break;
1489   case MAT_USE_HASH_TABLE:
1490     a->ht_flag = flg;
1491     a->ht_fact = 1.39;
1492     break;
1493   case MAT_SYMMETRIC:
1494   case MAT_STRUCTURALLY_SYMMETRIC:
1495   case MAT_HERMITIAN:
1496   case MAT_SUBMAT_SINGLEIS:
1497   case MAT_SYMMETRY_ETERNAL:
1498   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1499   case MAT_SPD_ETERNAL:
1500     /* if the diagonal matrix is square it inherits some of the properties above */
1501     break;
1502   default:
1503     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1504   }
1505   PetscFunctionReturn(PETSC_SUCCESS);
1506 }
1507 
1508 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1509 {
1510   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1511   Mat_SeqBAIJ *Aloc;
1512   Mat          B;
1513   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1514   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1515   MatScalar   *a;
1516 
1517   PetscFunctionBegin;
1518   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1519   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1520     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1521     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1522     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1523     /* Do not know preallocation information, but must set block size */
1524     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1525   } else {
1526     B = *matout;
1527   }
1528 
1529   /* copy over the A part */
1530   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1531   ai   = Aloc->i;
1532   aj   = Aloc->j;
1533   a    = Aloc->a;
1534   PetscCall(PetscMalloc1(bs, &rvals));
1535 
1536   for (i = 0; i < mbs; i++) {
1537     rvals[0] = bs * (baij->rstartbs + i);
1538     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1539     for (j = ai[i]; j < ai[i + 1]; j++) {
1540       col = (baij->cstartbs + aj[j]) * bs;
1541       for (k = 0; k < bs; k++) {
1542         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1543 
1544         col++;
1545         a += bs;
1546       }
1547     }
1548   }
1549   /* copy over the B part */
1550   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1551   ai   = Aloc->i;
1552   aj   = Aloc->j;
1553   a    = Aloc->a;
1554   for (i = 0; i < mbs; i++) {
1555     rvals[0] = bs * (baij->rstartbs + i);
1556     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1557     for (j = ai[i]; j < ai[i + 1]; j++) {
1558       col = baij->garray[aj[j]] * bs;
1559       for (k = 0; k < bs; k++) {
1560         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1561         col++;
1562         a += bs;
1563       }
1564     }
1565   }
1566   PetscCall(PetscFree(rvals));
1567   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1568   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1569 
1570   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1571   else PetscCall(MatHeaderMerge(A, &B));
1572   PetscFunctionReturn(PETSC_SUCCESS);
1573 }
1574 
1575 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1576 {
1577   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1578   Mat          a = baij->A, b = baij->B;
1579   PetscInt     s1, s2, s3;
1580 
1581   PetscFunctionBegin;
1582   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1583   if (rr) {
1584     PetscCall(VecGetLocalSize(rr, &s1));
1585     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1586     /* Overlap communication with computation. */
1587     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1588   }
1589   if (ll) {
1590     PetscCall(VecGetLocalSize(ll, &s1));
1591     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1592     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1593   }
1594   /* scale  the diagonal block */
1595   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1596 
1597   if (rr) {
1598     /* Do a scatter end and then right scale the off-diagonal block */
1599     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1600     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1601   }
1602   PetscFunctionReturn(PETSC_SUCCESS);
1603 }
1604 
1605 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1606 {
1607   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1608   PetscInt    *lrows;
1609   PetscInt     r, len;
1610   PetscBool    cong;
1611 
1612   PetscFunctionBegin;
1613   /* get locally owned rows */
1614   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1615   /* fix right hand side if needed */
1616   if (x && b) {
1617     const PetscScalar *xx;
1618     PetscScalar       *bb;
1619 
1620     PetscCall(VecGetArrayRead(x, &xx));
1621     PetscCall(VecGetArray(b, &bb));
1622     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1623     PetscCall(VecRestoreArrayRead(x, &xx));
1624     PetscCall(VecRestoreArray(b, &bb));
1625   }
1626 
1627   /* actually zap the local rows */
1628   /*
1629         Zero the required rows. If the "diagonal block" of the matrix
1630      is square and the user wishes to set the diagonal we use separate
1631      code so that MatSetValues() is not called for each diagonal allocating
1632      new memory, thus calling lots of mallocs and slowing things down.
1633 
1634   */
1635   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1636   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1637   PetscCall(MatHasCongruentLayouts(A, &cong));
1638   if ((diag != 0.0) && cong) {
1639     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1640   } else if (diag != 0.0) {
1641     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1642     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1643        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1644     for (r = 0; r < len; ++r) {
1645       const PetscInt row = lrows[r] + A->rmap->rstart;
1646       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1647     }
1648     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1649     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1650   } else {
1651     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1652   }
1653   PetscCall(PetscFree(lrows));
1654 
1655   /* only change matrix nonzero state if pattern was allowed to be changed */
1656   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1657     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1658     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1659   }
1660   PetscFunctionReturn(PETSC_SUCCESS);
1661 }
1662 
1663 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1664 {
1665   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1666   PetscMPIInt        n = A->rmap->n, p = 0;
1667   PetscInt           i, j, k, r, len = 0, row, col, count;
1668   PetscInt          *lrows, *owners = A->rmap->range;
1669   PetscSFNode       *rrows;
1670   PetscSF            sf;
1671   const PetscScalar *xx;
1672   PetscScalar       *bb, *mask;
1673   Vec                xmask, lmask;
1674   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1675   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1676   PetscScalar       *aa;
1677 
1678   PetscFunctionBegin;
1679   /* Create SF where leaves are input rows and roots are owned rows */
1680   PetscCall(PetscMalloc1(n, &lrows));
1681   for (r = 0; r < n; ++r) lrows[r] = -1;
1682   PetscCall(PetscMalloc1(N, &rrows));
1683   for (r = 0; r < N; ++r) {
1684     const PetscInt idx = rows[r];
1685     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1686     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1687       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1688     }
1689     rrows[r].rank  = p;
1690     rrows[r].index = rows[r] - owners[p];
1691   }
1692   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1693   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1694   /* Collect flags for rows to be zeroed */
1695   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1696   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1697   PetscCall(PetscSFDestroy(&sf));
1698   /* Compress and put in row numbers */
1699   for (r = 0; r < n; ++r)
1700     if (lrows[r] >= 0) lrows[len++] = r;
1701   /* zero diagonal part of matrix */
1702   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1703   /* handle off diagonal part of matrix */
1704   PetscCall(MatCreateVecs(A, &xmask, NULL));
1705   PetscCall(VecDuplicate(l->lvec, &lmask));
1706   PetscCall(VecGetArray(xmask, &bb));
1707   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1708   PetscCall(VecRestoreArray(xmask, &bb));
1709   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1710   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1711   PetscCall(VecDestroy(&xmask));
1712   if (x) {
1713     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1714     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1715     PetscCall(VecGetArrayRead(l->lvec, &xx));
1716     PetscCall(VecGetArray(b, &bb));
1717   }
1718   PetscCall(VecGetArray(lmask, &mask));
1719   /* remove zeroed rows of off diagonal matrix */
1720   for (i = 0; i < len; ++i) {
1721     row   = lrows[i];
1722     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1723     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1724     for (k = 0; k < count; ++k) {
1725       aa[0] = 0.0;
1726       aa += bs;
1727     }
1728   }
1729   /* loop over all elements of off process part of matrix zeroing removed columns*/
1730   for (i = 0; i < l->B->rmap->N; ++i) {
1731     row = i / bs;
1732     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1733       for (k = 0; k < bs; ++k) {
1734         col = bs * baij->j[j] + k;
1735         if (PetscAbsScalar(mask[col])) {
1736           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1737           if (x) bb[i] -= aa[0] * xx[col];
1738           aa[0] = 0.0;
1739         }
1740       }
1741     }
1742   }
1743   if (x) {
1744     PetscCall(VecRestoreArray(b, &bb));
1745     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1746   }
1747   PetscCall(VecRestoreArray(lmask, &mask));
1748   PetscCall(VecDestroy(&lmask));
1749   PetscCall(PetscFree(lrows));
1750 
1751   /* only change matrix nonzero state if pattern was allowed to be changed */
1752   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1753     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1754     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1755   }
1756   PetscFunctionReturn(PETSC_SUCCESS);
1757 }
1758 
1759 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1760 {
1761   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1762 
1763   PetscFunctionBegin;
1764   PetscCall(MatSetUnfactored(a->A));
1765   PetscFunctionReturn(PETSC_SUCCESS);
1766 }
1767 
1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1769 
1770 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1771 {
1772   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1773   Mat          a, b, c, d;
1774   PetscBool    flg;
1775 
1776   PetscFunctionBegin;
1777   a = matA->A;
1778   b = matA->B;
1779   c = matB->A;
1780   d = matB->B;
1781 
1782   PetscCall(MatEqual(a, c, &flg));
1783   if (flg) PetscCall(MatEqual(b, d, &flg));
1784   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1785   PetscFunctionReturn(PETSC_SUCCESS);
1786 }
1787 
1788 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1789 {
1790   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1791   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1792 
1793   PetscFunctionBegin;
1794   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1795   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1796     PetscCall(MatCopy_Basic(A, B, str));
1797   } else {
1798     PetscCall(MatCopy(a->A, b->A, str));
1799     PetscCall(MatCopy(a->B, b->B, str));
1800   }
1801   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1802   PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804 
1805 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1806 {
1807   PetscFunctionBegin;
1808   PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1809   PetscFunctionReturn(PETSC_SUCCESS);
1810 }
1811 
1812 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1813 {
1814   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1815   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1816   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1820   PetscFunctionReturn(PETSC_SUCCESS);
1821 }
1822 
1823 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1824 {
1825   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1826   PetscBLASInt bnz, one                         = 1;
1827   Mat_SeqBAIJ *x, *y;
1828   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1829 
1830   PetscFunctionBegin;
1831   if (str == SAME_NONZERO_PATTERN) {
1832     PetscScalar alpha = a;
1833     x                 = (Mat_SeqBAIJ *)xx->A->data;
1834     y                 = (Mat_SeqBAIJ *)yy->A->data;
1835     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1836     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1837     x = (Mat_SeqBAIJ *)xx->B->data;
1838     y = (Mat_SeqBAIJ *)yy->B->data;
1839     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1840     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1841     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1842   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1843     PetscCall(MatAXPY_Basic(Y, a, X, str));
1844   } else {
1845     Mat       B;
1846     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1847     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1848     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1849     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1850     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1851     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1852     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1853     PetscCall(MatSetType(B, MATMPIBAIJ));
1854     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1855     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1856     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1857     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1858     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1859     PetscCall(MatHeaderMerge(Y, &B));
1860     PetscCall(PetscFree(nnz_d));
1861     PetscCall(PetscFree(nnz_o));
1862   }
1863   PetscFunctionReturn(PETSC_SUCCESS);
1864 }
1865 
1866 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1867 {
1868   PetscFunctionBegin;
1869   if (PetscDefined(USE_COMPLEX)) {
1870     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1871 
1872     PetscCall(MatConjugate_SeqBAIJ(a->A));
1873     PetscCall(MatConjugate_SeqBAIJ(a->B));
1874   }
1875   PetscFunctionReturn(PETSC_SUCCESS);
1876 }
1877 
1878 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1879 {
1880   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1881 
1882   PetscFunctionBegin;
1883   PetscCall(MatRealPart(a->A));
1884   PetscCall(MatRealPart(a->B));
1885   PetscFunctionReturn(PETSC_SUCCESS);
1886 }
1887 
1888 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1889 {
1890   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1891 
1892   PetscFunctionBegin;
1893   PetscCall(MatImaginaryPart(a->A));
1894   PetscCall(MatImaginaryPart(a->B));
1895   PetscFunctionReturn(PETSC_SUCCESS);
1896 }
1897 
1898 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1899 {
1900   IS       iscol_local;
1901   PetscInt csize;
1902 
1903   PetscFunctionBegin;
1904   PetscCall(ISGetLocalSize(iscol, &csize));
1905   if (call == MAT_REUSE_MATRIX) {
1906     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1907     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1908   } else {
1909     PetscCall(ISAllGather(iscol, &iscol_local));
1910   }
1911   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1912   if (call == MAT_INITIAL_MATRIX) {
1913     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1914     PetscCall(ISDestroy(&iscol_local));
1915   }
1916   PetscFunctionReturn(PETSC_SUCCESS);
1917 }
1918 
1919 /*
1920   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1921   in local and then by concatenating the local matrices the end result.
1922   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1923   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1924 */
1925 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1926 {
1927   PetscMPIInt  rank, size;
1928   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1929   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1930   Mat          M, Mreuse;
1931   MatScalar   *vwork, *aa;
1932   MPI_Comm     comm;
1933   IS           isrow_new, iscol_new;
1934   Mat_SeqBAIJ *aij;
1935 
1936   PetscFunctionBegin;
1937   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1938   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1939   PetscCallMPI(MPI_Comm_size(comm, &size));
1940   /* The compression and expansion should be avoided. Doesn't point
1941      out errors, might change the indices, hence buggey */
1942   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1943   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1944 
1945   if (call == MAT_REUSE_MATRIX) {
1946     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1947     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1948     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1949   } else {
1950     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1951   }
1952   PetscCall(ISDestroy(&isrow_new));
1953   PetscCall(ISDestroy(&iscol_new));
1954   /*
1955       m - number of local rows
1956       n - number of columns (same on all processors)
1957       rstart - first row in new global matrix generated
1958   */
1959   PetscCall(MatGetBlockSize(mat, &bs));
1960   PetscCall(MatGetSize(Mreuse, &m, &n));
1961   m = m / bs;
1962   n = n / bs;
1963 
1964   if (call == MAT_INITIAL_MATRIX) {
1965     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1966     ii  = aij->i;
1967     jj  = aij->j;
1968 
1969     /*
1970         Determine the number of non-zeros in the diagonal and off-diagonal
1971         portions of the matrix in order to do correct preallocation
1972     */
1973 
1974     /* first get start and end of "diagonal" columns */
1975     if (csize == PETSC_DECIDE) {
1976       PetscCall(ISGetSize(isrow, &mglobal));
1977       if (mglobal == n * bs) { /* square matrix */
1978         nlocal = m;
1979       } else {
1980         nlocal = n / size + ((n % size) > rank);
1981       }
1982     } else {
1983       nlocal = csize / bs;
1984     }
1985     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1986     rstart = rend - nlocal;
1987     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1988 
1989     /* next, compute all the lengths */
1990     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1991     for (i = 0; i < m; i++) {
1992       jend = ii[i + 1] - ii[i];
1993       olen = 0;
1994       dlen = 0;
1995       for (j = 0; j < jend; j++) {
1996         if (*jj < rstart || *jj >= rend) olen++;
1997         else dlen++;
1998         jj++;
1999       }
2000       olens[i] = olen;
2001       dlens[i] = dlen;
2002     }
2003     PetscCall(MatCreate(comm, &M));
2004     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2005     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
2006     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2007     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2008     PetscCall(PetscFree2(dlens, olens));
2009   } else {
2010     PetscInt ml, nl;
2011 
2012     M = *newmat;
2013     PetscCall(MatGetLocalSize(M, &ml, &nl));
2014     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2015     PetscCall(MatZeroEntries(M));
2016     /*
2017          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2018        rather than the slower MatSetValues().
2019     */
2020     M->was_assembled = PETSC_TRUE;
2021     M->assembled     = PETSC_FALSE;
2022   }
2023   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2024   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2025   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2026   ii  = aij->i;
2027   jj  = aij->j;
2028   aa  = aij->a;
2029   for (i = 0; i < m; i++) {
2030     row   = rstart / bs + i;
2031     nz    = ii[i + 1] - ii[i];
2032     cwork = jj;
2033     jj += nz;
2034     vwork = aa;
2035     aa += nz * bs * bs;
2036     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2037   }
2038 
2039   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2040   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2041   *newmat = M;
2042 
2043   /* save submatrix used in processor for next request */
2044   if (call == MAT_INITIAL_MATRIX) {
2045     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2046     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2047   }
2048   PetscFunctionReturn(PETSC_SUCCESS);
2049 }
2050 
2051 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2052 {
2053   MPI_Comm        comm, pcomm;
2054   PetscInt        clocal_size, nrows;
2055   const PetscInt *rows;
2056   PetscMPIInt     size;
2057   IS              crowp, lcolp;
2058 
2059   PetscFunctionBegin;
2060   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2061   /* make a collective version of 'rowp' */
2062   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2063   if (pcomm == comm) {
2064     crowp = rowp;
2065   } else {
2066     PetscCall(ISGetSize(rowp, &nrows));
2067     PetscCall(ISGetIndices(rowp, &rows));
2068     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2069     PetscCall(ISRestoreIndices(rowp, &rows));
2070   }
2071   PetscCall(ISSetPermutation(crowp));
2072   /* make a local version of 'colp' */
2073   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2074   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2075   if (size == 1) {
2076     lcolp = colp;
2077   } else {
2078     PetscCall(ISAllGather(colp, &lcolp));
2079   }
2080   PetscCall(ISSetPermutation(lcolp));
2081   /* now we just get the submatrix */
2082   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2083   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2084   /* clean up */
2085   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2086   if (size > 1) PetscCall(ISDestroy(&lcolp));
2087   PetscFunctionReturn(PETSC_SUCCESS);
2088 }
2089 
2090 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2091 {
2092   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2093   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2094 
2095   PetscFunctionBegin;
2096   if (nghosts) *nghosts = B->nbs;
2097   if (ghosts) *ghosts = baij->garray;
2098   PetscFunctionReturn(PETSC_SUCCESS);
2099 }
2100 
2101 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2102 {
2103   Mat          B;
2104   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2105   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2106   Mat_SeqAIJ  *b;
2107   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2108   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2109   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2110 
2111   PetscFunctionBegin;
2112   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2113   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2114 
2115   /* ----------------------------------------------------------------
2116      Tell every processor the number of nonzeros per row
2117   */
2118   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2119   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2120   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2121   displs = recvcounts + size;
2122   for (i = 0; i < size; i++) {
2123     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2124     displs[i]     = A->rmap->range[i] / bs;
2125   }
2126   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2127   /* ---------------------------------------------------------------
2128      Create the sequential matrix of the same type as the local block diagonal
2129   */
2130   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2131   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2132   PetscCall(MatSetType(B, MATSEQAIJ));
2133   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2134   b = (Mat_SeqAIJ *)B->data;
2135 
2136   /*--------------------------------------------------------------------
2137     Copy my part of matrix column indices over
2138   */
2139   sendcount  = ad->nz + bd->nz;
2140   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2141   a_jsendbuf = ad->j;
2142   b_jsendbuf = bd->j;
2143   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2144   cnt        = 0;
2145   for (i = 0; i < n; i++) {
2146     /* put in lower diagonal portion */
2147     m = bd->i[i + 1] - bd->i[i];
2148     while (m > 0) {
2149       /* is it above diagonal (in bd (compressed) numbering) */
2150       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2151       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2152       m--;
2153     }
2154 
2155     /* put in diagonal portion */
2156     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2157 
2158     /* put in upper diagonal portion */
2159     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2160   }
2161   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2162 
2163   /*--------------------------------------------------------------------
2164     Gather all column indices to all processors
2165   */
2166   for (i = 0; i < size; i++) {
2167     recvcounts[i] = 0;
2168     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2169   }
2170   displs[0] = 0;
2171   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2172   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2173   /*--------------------------------------------------------------------
2174     Assemble the matrix into useable form (note numerical values not yet set)
2175   */
2176   /* set the b->ilen (length of each row) values */
2177   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2178   /* set the b->i indices */
2179   b->i[0] = 0;
2180   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2181   PetscCall(PetscFree(lens));
2182   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2183   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2184   PetscCall(PetscFree(recvcounts));
2185 
2186   PetscCall(MatPropagateSymmetryOptions(A, B));
2187   *newmat = B;
2188   PetscFunctionReturn(PETSC_SUCCESS);
2189 }
2190 
2191 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2192 {
2193   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2194   Vec          bb1 = NULL;
2195 
2196   PetscFunctionBegin;
2197   if (flag == SOR_APPLY_UPPER) {
2198     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2199     PetscFunctionReturn(PETSC_SUCCESS);
2200   }
2201 
2202   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2203 
2204   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2205     if (flag & SOR_ZERO_INITIAL_GUESS) {
2206       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2207       its--;
2208     }
2209 
2210     while (its--) {
2211       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2212       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2213 
2214       /* update rhs: bb1 = bb - B*x */
2215       PetscCall(VecScale(mat->lvec, -1.0));
2216       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2217 
2218       /* local sweep */
2219       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2220     }
2221   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2222     if (flag & SOR_ZERO_INITIAL_GUESS) {
2223       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2224       its--;
2225     }
2226     while (its--) {
2227       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2228       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2229 
2230       /* update rhs: bb1 = bb - B*x */
2231       PetscCall(VecScale(mat->lvec, -1.0));
2232       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2233 
2234       /* local sweep */
2235       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2236     }
2237   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2238     if (flag & SOR_ZERO_INITIAL_GUESS) {
2239       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2240       its--;
2241     }
2242     while (its--) {
2243       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2244       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2245 
2246       /* update rhs: bb1 = bb - B*x */
2247       PetscCall(VecScale(mat->lvec, -1.0));
2248       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2249 
2250       /* local sweep */
2251       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2252     }
2253   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2254 
2255   PetscCall(VecDestroy(&bb1));
2256   PetscFunctionReturn(PETSC_SUCCESS);
2257 }
2258 
2259 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2260 {
2261   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2262   PetscInt     m, N, i, *garray = aij->garray;
2263   PetscInt     ib, jb, bs = A->rmap->bs;
2264   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2265   MatScalar   *a_val = a_aij->a;
2266   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2267   MatScalar   *b_val = b_aij->a;
2268   PetscReal   *work;
2269 
2270   PetscFunctionBegin;
2271   PetscCall(MatGetSize(A, &m, &N));
2272   PetscCall(PetscCalloc1(N, &work));
2273   if (type == NORM_2) {
2274     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2275       for (jb = 0; jb < bs; jb++) {
2276         for (ib = 0; ib < bs; ib++) {
2277           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2278           a_val++;
2279         }
2280       }
2281     }
2282     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2283       for (jb = 0; jb < bs; jb++) {
2284         for (ib = 0; ib < bs; ib++) {
2285           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2286           b_val++;
2287         }
2288       }
2289     }
2290   } else if (type == NORM_1) {
2291     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2292       for (jb = 0; jb < bs; jb++) {
2293         for (ib = 0; ib < bs; ib++) {
2294           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2295           a_val++;
2296         }
2297       }
2298     }
2299     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2300       for (jb = 0; jb < bs; jb++) {
2301         for (ib = 0; ib < bs; ib++) {
2302           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2303           b_val++;
2304         }
2305       }
2306     }
2307   } else if (type == NORM_INFINITY) {
2308     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2309       for (jb = 0; jb < bs; jb++) {
2310         for (ib = 0; ib < bs; ib++) {
2311           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2312           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2313           a_val++;
2314         }
2315       }
2316     }
2317     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2318       for (jb = 0; jb < bs; jb++) {
2319         for (ib = 0; ib < bs; ib++) {
2320           int col   = garray[b_aij->j[i]] * bs + jb;
2321           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2322           b_val++;
2323         }
2324       }
2325     }
2326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2327     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2328       for (jb = 0; jb < bs; jb++) {
2329         for (ib = 0; ib < bs; ib++) {
2330           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2331           a_val++;
2332         }
2333       }
2334     }
2335     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2336       for (jb = 0; jb < bs; jb++) {
2337         for (ib = 0; ib < bs; ib++) {
2338           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2339           b_val++;
2340         }
2341       }
2342     }
2343   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2344     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2345       for (jb = 0; jb < bs; jb++) {
2346         for (ib = 0; ib < bs; ib++) {
2347           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2348           a_val++;
2349         }
2350       }
2351     }
2352     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2353       for (jb = 0; jb < bs; jb++) {
2354         for (ib = 0; ib < bs; ib++) {
2355           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2356           b_val++;
2357         }
2358       }
2359     }
2360   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2361   if (type == NORM_INFINITY) {
2362     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2363   } else {
2364     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2365   }
2366   PetscCall(PetscFree(work));
2367   if (type == NORM_2) {
2368     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2369   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2370     for (i = 0; i < N; i++) reductions[i] /= m;
2371   }
2372   PetscFunctionReturn(PETSC_SUCCESS);
2373 }
2374 
2375 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2376 {
2377   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2378 
2379   PetscFunctionBegin;
2380   PetscCall(MatInvertBlockDiagonal(a->A, values));
2381   A->factorerrortype             = a->A->factorerrortype;
2382   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2383   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2384   PetscFunctionReturn(PETSC_SUCCESS);
2385 }
2386 
2387 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2388 {
2389   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2390   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2391 
2392   PetscFunctionBegin;
2393   if (!Y->preallocated) {
2394     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2395   } else if (!aij->nz) {
2396     PetscInt nonew = aij->nonew;
2397     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2398     aij->nonew = nonew;
2399   }
2400   PetscCall(MatShift_Basic(Y, a));
2401   PetscFunctionReturn(PETSC_SUCCESS);
2402 }
2403 
2404 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2405 {
2406   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2407 
2408   PetscFunctionBegin;
2409   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2410   PetscCall(MatMissingDiagonal(a->A, missing, d));
2411   if (d) {
2412     PetscInt rstart;
2413     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2414     *d += rstart / A->rmap->bs;
2415   }
2416   PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418 
2419 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2420 {
2421   PetscFunctionBegin;
2422   *a = ((Mat_MPIBAIJ *)A->data)->A;
2423   PetscFunctionReturn(PETSC_SUCCESS);
2424 }
2425 
2426 /* -------------------------------------------------------------------*/
2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2428                                        MatGetRow_MPIBAIJ,
2429                                        MatRestoreRow_MPIBAIJ,
2430                                        MatMult_MPIBAIJ,
2431                                        /* 4*/ MatMultAdd_MPIBAIJ,
2432                                        MatMultTranspose_MPIBAIJ,
2433                                        MatMultTransposeAdd_MPIBAIJ,
2434                                        NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        /*10*/ NULL,
2438                                        NULL,
2439                                        NULL,
2440                                        MatSOR_MPIBAIJ,
2441                                        MatTranspose_MPIBAIJ,
2442                                        /*15*/ MatGetInfo_MPIBAIJ,
2443                                        MatEqual_MPIBAIJ,
2444                                        MatGetDiagonal_MPIBAIJ,
2445                                        MatDiagonalScale_MPIBAIJ,
2446                                        MatNorm_MPIBAIJ,
2447                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2448                                        MatAssemblyEnd_MPIBAIJ,
2449                                        MatSetOption_MPIBAIJ,
2450                                        MatZeroEntries_MPIBAIJ,
2451                                        /*24*/ MatZeroRows_MPIBAIJ,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        /*29*/ MatSetUp_MPIBAIJ,
2457                                        NULL,
2458                                        NULL,
2459                                        MatGetDiagonalBlock_MPIBAIJ,
2460                                        NULL,
2461                                        /*34*/ MatDuplicate_MPIBAIJ,
2462                                        NULL,
2463                                        NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        /*39*/ MatAXPY_MPIBAIJ,
2467                                        MatCreateSubMatrices_MPIBAIJ,
2468                                        MatIncreaseOverlap_MPIBAIJ,
2469                                        MatGetValues_MPIBAIJ,
2470                                        MatCopy_MPIBAIJ,
2471                                        /*44*/ NULL,
2472                                        MatScale_MPIBAIJ,
2473                                        MatShift_MPIBAIJ,
2474                                        NULL,
2475                                        MatZeroRowsColumns_MPIBAIJ,
2476                                        /*49*/ NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2482                                        NULL,
2483                                        MatSetUnfactored_MPIBAIJ,
2484                                        MatPermute_MPIBAIJ,
2485                                        MatSetValuesBlocked_MPIBAIJ,
2486                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2487                                        MatDestroy_MPIBAIJ,
2488                                        MatView_MPIBAIJ,
2489                                        NULL,
2490                                        NULL,
2491                                        /*64*/ NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        /*74*/ NULL,
2502                                        MatFDColoringApply_BAIJ,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        /*79*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        MatLoad_MPIBAIJ,
2511                                        /*84*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        /*89*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*94*/ NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        /*99*/ NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        MatConjugate_MPIBAIJ,
2530                                        NULL,
2531                                        /*104*/ NULL,
2532                                        MatRealPart_MPIBAIJ,
2533                                        MatImaginaryPart_MPIBAIJ,
2534                                        NULL,
2535                                        NULL,
2536                                        /*109*/ NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        MatMissingDiagonal_MPIBAIJ,
2541                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2542                                        NULL,
2543                                        MatGetGhosts_MPIBAIJ,
2544                                        NULL,
2545                                        NULL,
2546                                        /*119*/ NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        MatGetMultiProcBlock_MPIBAIJ,
2551                                        /*124*/ NULL,
2552                                        MatGetColumnReductions_MPIBAIJ,
2553                                        MatInvertBlockDiagonal_MPIBAIJ,
2554                                        NULL,
2555                                        NULL,
2556                                        /*129*/ NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        /*134*/ NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        /*139*/ MatSetBlockSizes_Default,
2567                                        NULL,
2568                                        NULL,
2569                                        MatFDColoringSetUp_MPIXAIJ,
2570                                        NULL,
2571                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        /*150*/ NULL,
2578                                        NULL};
2579 
2580 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2581 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2582 
2583 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2584 {
2585   PetscInt        m, rstart, cstart, cend;
2586   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2587   const PetscInt *JJ          = NULL;
2588   PetscScalar    *values      = NULL;
2589   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2590   PetscBool       nooffprocentries;
2591 
2592   PetscFunctionBegin;
2593   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2594   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2595   PetscCall(PetscLayoutSetUp(B->rmap));
2596   PetscCall(PetscLayoutSetUp(B->cmap));
2597   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2598   m      = B->rmap->n / bs;
2599   rstart = B->rmap->rstart / bs;
2600   cstart = B->cmap->rstart / bs;
2601   cend   = B->cmap->rend / bs;
2602 
2603   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2604   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2605   for (i = 0; i < m; i++) {
2606     nz = ii[i + 1] - ii[i];
2607     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2608     nz_max = PetscMax(nz_max, nz);
2609     dlen   = 0;
2610     olen   = 0;
2611     JJ     = jj + ii[i];
2612     for (j = 0; j < nz; j++) {
2613       if (*JJ < cstart || *JJ >= cend) olen++;
2614       else dlen++;
2615       JJ++;
2616     }
2617     d_nnz[i] = dlen;
2618     o_nnz[i] = olen;
2619   }
2620   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2621   PetscCall(PetscFree2(d_nnz, o_nnz));
2622 
2623   values = (PetscScalar *)V;
2624   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2625   for (i = 0; i < m; i++) {
2626     PetscInt        row   = i + rstart;
2627     PetscInt        ncols = ii[i + 1] - ii[i];
2628     const PetscInt *icols = jj + ii[i];
2629     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2630       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2631       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2632     } else { /* block ordering does not match so we can only insert one block at a time. */
2633       PetscInt j;
2634       for (j = 0; j < ncols; j++) {
2635         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2636         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2637       }
2638     }
2639   }
2640 
2641   if (!V) PetscCall(PetscFree(values));
2642   nooffprocentries    = B->nooffprocentries;
2643   B->nooffprocentries = PETSC_TRUE;
2644   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2645   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2646   B->nooffprocentries = nooffprocentries;
2647 
2648   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2649   PetscFunctionReturn(PETSC_SUCCESS);
2650 }
2651 
2652 /*@C
2653    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2654 
2655    Collective
2656 
2657    Input Parameters:
2658 +  B - the matrix
2659 .  bs - the block size
2660 .  i - the indices into j for the start of each local row (starts with zero)
2661 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2662 -  v - optional values in the matrix
2663 
2664    Level: advanced
2665 
2666    Notes:
2667     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2668    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2669    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2670    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2671    block column and the second index is over columns within a block.
2672 
2673    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2674 
2675 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2676 @*/
2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2678 {
2679   PetscFunctionBegin;
2680   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2681   PetscValidType(B, 1);
2682   PetscValidLogicalCollectiveInt(B, bs, 2);
2683   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2684   PetscFunctionReturn(PETSC_SUCCESS);
2685 }
2686 
2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2688 {
2689   Mat_MPIBAIJ *b;
2690   PetscInt     i;
2691   PetscMPIInt  size;
2692 
2693   PetscFunctionBegin;
2694   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2695   PetscCall(PetscLayoutSetUp(B->rmap));
2696   PetscCall(PetscLayoutSetUp(B->cmap));
2697   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2698 
2699   if (d_nnz) {
2700     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2701   }
2702   if (o_nnz) {
2703     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2704   }
2705 
2706   b      = (Mat_MPIBAIJ *)B->data;
2707   b->bs2 = bs * bs;
2708   b->mbs = B->rmap->n / bs;
2709   b->nbs = B->cmap->n / bs;
2710   b->Mbs = B->rmap->N / bs;
2711   b->Nbs = B->cmap->N / bs;
2712 
2713   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2714   b->rstartbs = B->rmap->rstart / bs;
2715   b->rendbs   = B->rmap->rend / bs;
2716   b->cstartbs = B->cmap->rstart / bs;
2717   b->cendbs   = B->cmap->rend / bs;
2718 
2719 #if defined(PETSC_USE_CTABLE)
2720   PetscCall(PetscHMapIDestroy(&b->colmap));
2721 #else
2722   PetscCall(PetscFree(b->colmap));
2723 #endif
2724   PetscCall(PetscFree(b->garray));
2725   PetscCall(VecDestroy(&b->lvec));
2726   PetscCall(VecScatterDestroy(&b->Mvctx));
2727 
2728   /* Because the B will have been resized we simply destroy it and create a new one each time */
2729   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2730   PetscCall(MatDestroy(&b->B));
2731   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2732   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2733   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2734 
2735   if (!B->preallocated) {
2736     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2737     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2738     PetscCall(MatSetType(b->A, MATSEQBAIJ));
2739     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2740   }
2741 
2742   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2743   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2744   B->preallocated  = PETSC_TRUE;
2745   B->was_assembled = PETSC_FALSE;
2746   B->assembled     = PETSC_FALSE;
2747   PetscFunctionReturn(PETSC_SUCCESS);
2748 }
2749 
2750 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2751 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2752 
2753 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2754 {
2755   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2756   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2757   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2758   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2759 
2760   PetscFunctionBegin;
2761   PetscCall(PetscMalloc1(M + 1, &ii));
2762   ii[0] = 0;
2763   for (i = 0; i < M; i++) {
2764     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2765     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2766     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2767     /* remove one from count of matrix has diagonal */
2768     for (j = id[i]; j < id[i + 1]; j++) {
2769       if (jd[j] == i) {
2770         ii[i + 1]--;
2771         break;
2772       }
2773     }
2774   }
2775   PetscCall(PetscMalloc1(ii[M], &jj));
2776   cnt = 0;
2777   for (i = 0; i < M; i++) {
2778     for (j = io[i]; j < io[i + 1]; j++) {
2779       if (garray[jo[j]] > rstart) break;
2780       jj[cnt++] = garray[jo[j]];
2781     }
2782     for (k = id[i]; k < id[i + 1]; k++) {
2783       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2784     }
2785     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2786   }
2787   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2788   PetscFunctionReturn(PETSC_SUCCESS);
2789 }
2790 
2791 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2792 
2793 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2794 
2795 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2796 {
2797   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2798   Mat_MPIAIJ  *b;
2799   Mat          B;
2800 
2801   PetscFunctionBegin;
2802   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2803 
2804   if (reuse == MAT_REUSE_MATRIX) {
2805     B = *newmat;
2806   } else {
2807     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2808     PetscCall(MatSetType(B, MATMPIAIJ));
2809     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2810     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2811     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2812     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2813   }
2814   b = (Mat_MPIAIJ *)B->data;
2815 
2816   if (reuse == MAT_REUSE_MATRIX) {
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2818     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2819   } else {
2820     PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd;
2821     PetscCall(MatDestroy(&b->A));
2822     PetscCall(MatDestroy(&b->B));
2823     PetscCall(MatDisAssemble_MPIBAIJ(A));
2824     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2825     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2826     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2827     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2828     A->symmetric              = sym;
2829     A->hermitian              = hermitian;
2830     A->structurally_symmetric = structurally_symmetric;
2831     A->spd                    = spd;
2832   }
2833   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2834   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2835 
2836   if (reuse == MAT_INPLACE_MATRIX) {
2837     PetscCall(MatHeaderReplace(A, &B));
2838   } else {
2839     *newmat = B;
2840   }
2841   PetscFunctionReturn(PETSC_SUCCESS);
2842 }
2843 
2844 /*MC
2845    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2846 
2847    Options Database Keys:
2848 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2849 . -mat_block_size <bs> - set the blocksize used to store the matrix
2850 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2851 - -mat_use_hash_table <fact> - set hash table factor
2852 
2853    Level: beginner
2854 
2855    Note:
2856     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2857     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2858 
2859 .seealso: `Mat`, MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2860 M*/
2861 
2862 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2863 
2864 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2865 {
2866   Mat_MPIBAIJ *b;
2867   PetscBool    flg = PETSC_FALSE;
2868 
2869   PetscFunctionBegin;
2870   PetscCall(PetscNew(&b));
2871   B->data = (void *)b;
2872 
2873   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2874   B->assembled = PETSC_FALSE;
2875 
2876   B->insertmode = NOT_SET_VALUES;
2877   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2878   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2879 
2880   /* build local table of row and column ownerships */
2881   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2882 
2883   /* build cache for off array entries formed */
2884   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2885 
2886   b->donotstash  = PETSC_FALSE;
2887   b->colmap      = NULL;
2888   b->garray      = NULL;
2889   b->roworiented = PETSC_TRUE;
2890 
2891   /* stuff used in block assembly */
2892   b->barray = NULL;
2893 
2894   /* stuff used for matrix vector multiply */
2895   b->lvec  = NULL;
2896   b->Mvctx = NULL;
2897 
2898   /* stuff for MatGetRow() */
2899   b->rowindices   = NULL;
2900   b->rowvalues    = NULL;
2901   b->getrowactive = PETSC_FALSE;
2902 
2903   /* hash table stuff */
2904   b->ht           = NULL;
2905   b->hd           = NULL;
2906   b->ht_size      = 0;
2907   b->ht_flag      = PETSC_FALSE;
2908   b->ht_fact      = 0;
2909   b->ht_total_ct  = 0;
2910   b->ht_insert_ct = 0;
2911 
2912   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2913   b->ijonly = PETSC_FALSE;
2914 
2915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2918 #if defined(PETSC_HAVE_HYPRE)
2919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2920 #endif
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2928   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2929 
2930   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2931   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2932   if (flg) {
2933     PetscReal fact = 1.39;
2934     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2935     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2936     if (fact <= 1.0) fact = 1.39;
2937     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2938     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2939   }
2940   PetscOptionsEnd();
2941   PetscFunctionReturn(PETSC_SUCCESS);
2942 }
2943 
2944 /*MC
2945    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2946 
2947    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2948    and `MATMPIBAIJ` otherwise.
2949 
2950    Options Database Keys:
2951 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2952 
2953   Level: beginner
2954 
2955 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2956 M*/
2957 
2958 /*@C
2959    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2960    (block compressed row).
2961 
2962    Collective
2963 
2964    Input Parameters:
2965 +  B - the matrix
2966 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2967           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2968 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2969            submatrix  (same for all local rows)
2970 .  d_nnz - array containing the number of block nonzeros in the various block rows
2971            of the in diagonal portion of the local (possibly different for each block
2972            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2973            set it even if it is zero.
2974 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2975            submatrix (same for all local rows).
2976 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2977            off-diagonal portion of the local submatrix (possibly different for
2978            each block row) or `NULL`.
2979 
2980    If the *_nnz parameter is given then the *_nz parameter is ignored
2981 
2982    Options Database Keys:
2983 +   -mat_block_size - size of the blocks to use
2984 -   -mat_use_hash_table <fact> - set hash table factor
2985 
2986    Level: intermediate
2987 
2988    Notes:
2989    For good matrix assembly performance
2990    the user should preallocate the matrix storage by setting the parameters
2991    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2992    performance can be increased by more than a factor of 50.
2993 
2994    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2995    than it must be used on all processors that share the object for that argument.
2996 
2997    Storage Information:
2998    For a square global matrix we define each processor's diagonal portion
2999    to be its local rows and the corresponding columns (a square submatrix);
3000    each processor's off-diagonal portion encompasses the remainder of the
3001    local matrix (a rectangular submatrix).
3002 
3003    The user can specify preallocated storage for the diagonal part of
3004    the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3005    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3006    memory allocation.  Likewise, specify preallocated storage for the
3007    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3008 
3009    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3010    the figure below we depict these three local rows and all columns (0-11).
3011 
3012 .vb
3013            0 1 2 3 4 5 6 7 8 9 10 11
3014           --------------------------
3015    row 3  |o o o d d d o o o o  o  o
3016    row 4  |o o o d d d o o o o  o  o
3017    row 5  |o o o d d d o o o o  o  o
3018           --------------------------
3019 .ve
3020 
3021    Thus, any entries in the d locations are stored in the d (diagonal)
3022    submatrix, and any entries in the o locations are stored in the
3023    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3024    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3025 
3026    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3027    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3028    In general, for PDE problems in which most nonzeros are near the diagonal,
3029    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
3030    or you will get TERRIBLE performance; see the users' manual chapter on
3031    matrices.
3032 
3033    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3034    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3035    You can also run with the option `-info` and look for messages with the string
3036    malloc in them to see if additional memory allocation was needed.
3037 
3038 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3039 @*/
3040 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3041 {
3042   PetscFunctionBegin;
3043   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3044   PetscValidType(B, 1);
3045   PetscValidLogicalCollectiveInt(B, bs, 2);
3046   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3047   PetscFunctionReturn(PETSC_SUCCESS);
3048 }
3049 
3050 /*@C
3051    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3052    (block compressed row).
3053 
3054    Collective
3055 
3056    Input Parameters:
3057 +  comm - MPI communicator
3058 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3059           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3060 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3061            This value should be the same as the local size used in creating the
3062            y vector for the matrix-vector product y = Ax.
3063 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3064            This value should be the same as the local size used in creating the
3065            x vector for the matrix-vector product y = Ax.
3066 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3067 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3068 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3069            submatrix  (same for all local rows)
3070 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3071            of the in diagonal portion of the local (possibly different for each block
3072            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3073            and set it even if it is zero.
3074 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3075            submatrix (same for all local rows).
3076 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3077            off-diagonal portion of the local submatrix (possibly different for
3078            each block row) or NULL.
3079 
3080    Output Parameter:
3081 .  A - the matrix
3082 
3083    Options Database Keys:
3084 +   -mat_block_size - size of the blocks to use
3085 -   -mat_use_hash_table <fact> - set hash table factor
3086 
3087    Level: intermediate
3088 
3089    Notes:
3090    For good matrix assembly performance
3091    the user should preallocate the matrix storage by setting the parameters
3092    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3093    performance can be increased by more than a factor of 50.
3094 
3095    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3096    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3097    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3098 
3099    If the *_nnz parameter is given then the *_nz parameter is ignored
3100 
3101    A nonzero block is any block that as 1 or more nonzeros in it
3102 
3103    The user MUST specify either the local or global matrix dimensions
3104    (possibly both).
3105 
3106    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3107    than it must be used on all processors that share the object for that argument.
3108 
3109    Storage Information:
3110    For a square global matrix we define each processor's diagonal portion
3111    to be its local rows and the corresponding columns (a square submatrix);
3112    each processor's off-diagonal portion encompasses the remainder of the
3113    local matrix (a rectangular submatrix).
3114 
3115    The user can specify preallocated storage for the diagonal part of
3116    the local submatrix with either d_nz or d_nnz (not both).  Set
3117    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3118    memory allocation.  Likewise, specify preallocated storage for the
3119    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3120 
3121    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3122    the figure below we depict these three local rows and all columns (0-11).
3123 
3124 .vb
3125            0 1 2 3 4 5 6 7 8 9 10 11
3126           --------------------------
3127    row 3  |o o o d d d o o o o  o  o
3128    row 4  |o o o d d d o o o o  o  o
3129    row 5  |o o o d d d o o o o  o  o
3130           --------------------------
3131 .ve
3132 
3133    Thus, any entries in the d locations are stored in the d (diagonal)
3134    submatrix, and any entries in the o locations are stored in the
3135    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3136    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3137 
3138    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3139    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3140    In general, for PDE problems in which most nonzeros are near the diagonal,
3141    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
3142    or you will get TERRIBLE performance; see the users' manual chapter on
3143    matrices.
3144 
3145 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3146 @*/
3147 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3148 {
3149   PetscMPIInt size;
3150 
3151   PetscFunctionBegin;
3152   PetscCall(MatCreate(comm, A));
3153   PetscCall(MatSetSizes(*A, m, n, M, N));
3154   PetscCallMPI(MPI_Comm_size(comm, &size));
3155   if (size > 1) {
3156     PetscCall(MatSetType(*A, MATMPIBAIJ));
3157     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3158   } else {
3159     PetscCall(MatSetType(*A, MATSEQBAIJ));
3160     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3161   }
3162   PetscFunctionReturn(PETSC_SUCCESS);
3163 }
3164 
3165 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3166 {
3167   Mat          mat;
3168   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3169   PetscInt     len = 0;
3170 
3171   PetscFunctionBegin;
3172   *newmat = NULL;
3173   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3174   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3175   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3176 
3177   mat->factortype   = matin->factortype;
3178   mat->preallocated = PETSC_TRUE;
3179   mat->assembled    = PETSC_TRUE;
3180   mat->insertmode   = NOT_SET_VALUES;
3181 
3182   a             = (Mat_MPIBAIJ *)mat->data;
3183   mat->rmap->bs = matin->rmap->bs;
3184   a->bs2        = oldmat->bs2;
3185   a->mbs        = oldmat->mbs;
3186   a->nbs        = oldmat->nbs;
3187   a->Mbs        = oldmat->Mbs;
3188   a->Nbs        = oldmat->Nbs;
3189 
3190   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3191   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3192 
3193   a->size         = oldmat->size;
3194   a->rank         = oldmat->rank;
3195   a->donotstash   = oldmat->donotstash;
3196   a->roworiented  = oldmat->roworiented;
3197   a->rowindices   = NULL;
3198   a->rowvalues    = NULL;
3199   a->getrowactive = PETSC_FALSE;
3200   a->barray       = NULL;
3201   a->rstartbs     = oldmat->rstartbs;
3202   a->rendbs       = oldmat->rendbs;
3203   a->cstartbs     = oldmat->cstartbs;
3204   a->cendbs       = oldmat->cendbs;
3205 
3206   /* hash table stuff */
3207   a->ht           = NULL;
3208   a->hd           = NULL;
3209   a->ht_size      = 0;
3210   a->ht_flag      = oldmat->ht_flag;
3211   a->ht_fact      = oldmat->ht_fact;
3212   a->ht_total_ct  = 0;
3213   a->ht_insert_ct = 0;
3214 
3215   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3216   if (oldmat->colmap) {
3217 #if defined(PETSC_USE_CTABLE)
3218     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3219 #else
3220     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3221     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3222 #endif
3223   } else a->colmap = NULL;
3224 
3225   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3226     PetscCall(PetscMalloc1(len, &a->garray));
3227     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3228   } else a->garray = NULL;
3229 
3230   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3231   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3232   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3233 
3234   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3235   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3236   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3237   *newmat = mat;
3238   PetscFunctionReturn(PETSC_SUCCESS);
3239 }
3240 
3241 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3242 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3243 {
3244   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3245   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3246   PetscScalar *matvals;
3247 
3248   PetscFunctionBegin;
3249   PetscCall(PetscViewerSetUp(viewer));
3250 
3251   /* read in matrix header */
3252   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3253   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3254   M  = header[1];
3255   N  = header[2];
3256   nz = header[3];
3257   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3258   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3259   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3260 
3261   /* set block sizes from the viewer's .info file */
3262   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3263   /* set local sizes if not set already */
3264   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3265   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3266   /* set global sizes if not set already */
3267   if (mat->rmap->N < 0) mat->rmap->N = M;
3268   if (mat->cmap->N < 0) mat->cmap->N = N;
3269   PetscCall(PetscLayoutSetUp(mat->rmap));
3270   PetscCall(PetscLayoutSetUp(mat->cmap));
3271 
3272   /* check if the matrix sizes are correct */
3273   PetscCall(MatGetSize(mat, &rows, &cols));
3274   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3275   PetscCall(MatGetBlockSize(mat, &bs));
3276   PetscCall(MatGetLocalSize(mat, &m, &n));
3277   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3278   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3279   mbs = m / bs;
3280   nbs = n / bs;
3281 
3282   /* read in row lengths and build row indices */
3283   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3284   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3285   rowidxs[0] = 0;
3286   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3287   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3288   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3289 
3290   /* read in column indices and matrix values */
3291   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3292   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3293   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3294 
3295   {                /* preallocate matrix storage */
3296     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3297     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3298     PetscBool  sbaij, done;
3299     PetscInt  *d_nnz, *o_nnz;
3300 
3301     PetscCall(PetscBTCreate(nbs, &bt));
3302     PetscCall(PetscHSetICreate(&ht));
3303     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3304     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3305     for (i = 0; i < mbs; i++) {
3306       PetscCall(PetscBTMemzero(nbs, bt));
3307       PetscCall(PetscHSetIClear(ht));
3308       for (k = 0; k < bs; k++) {
3309         PetscInt row = bs * i + k;
3310         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3311           PetscInt col = colidxs[j];
3312           if (!sbaij || col >= row) {
3313             if (col >= cs && col < ce) {
3314               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3315             } else {
3316               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3317               if (done) o_nnz[i]++;
3318             }
3319           }
3320         }
3321       }
3322     }
3323     PetscCall(PetscBTDestroy(&bt));
3324     PetscCall(PetscHSetIDestroy(&ht));
3325     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3326     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3327     PetscCall(PetscFree2(d_nnz, o_nnz));
3328   }
3329 
3330   /* store matrix values */
3331   for (i = 0; i < m; i++) {
3332     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3333     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3334   }
3335 
3336   PetscCall(PetscFree(rowidxs));
3337   PetscCall(PetscFree2(colidxs, matvals));
3338   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3339   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3340   PetscFunctionReturn(PETSC_SUCCESS);
3341 }
3342 
3343 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3344 {
3345   PetscBool isbinary;
3346 
3347   PetscFunctionBegin;
3348   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3349   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3350   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3351   PetscFunctionReturn(PETSC_SUCCESS);
3352 }
3353 
3354 /*@
3355    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3356 
3357    Input Parameters:
3358 +  mat  - the matrix
3359 -  fact - factor
3360 
3361    Options Database Key:
3362 .  -mat_use_hash_table <fact> - provide the factor
3363 
3364    Level: advanced
3365 
3366 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3367 @*/
3368 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3369 {
3370   PetscFunctionBegin;
3371   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3372   PetscFunctionReturn(PETSC_SUCCESS);
3373 }
3374 
3375 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3376 {
3377   Mat_MPIBAIJ *baij;
3378 
3379   PetscFunctionBegin;
3380   baij          = (Mat_MPIBAIJ *)mat->data;
3381   baij->ht_fact = fact;
3382   PetscFunctionReturn(PETSC_SUCCESS);
3383 }
3384 
3385 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3386 {
3387   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3388   PetscBool    flg;
3389 
3390   PetscFunctionBegin;
3391   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3392   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3393   if (Ad) *Ad = a->A;
3394   if (Ao) *Ao = a->B;
3395   if (colmap) *colmap = a->garray;
3396   PetscFunctionReturn(PETSC_SUCCESS);
3397 }
3398 
3399 /*
3400     Special version for direct calls from Fortran (to eliminate two function call overheads
3401 */
3402 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3403   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3404 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3405   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3406 #endif
3407 
3408 /*@C
3409   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3410 
3411   Collective
3412 
3413   Input Parameters:
3414 + mat - the matrix
3415 . min - number of input rows
3416 . im - input rows
3417 . nin - number of input columns
3418 . in - input columns
3419 . v - numerical values input
3420 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3421 
3422   Developer Note:
3423     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3424 
3425   Level: advanced
3426 
3427 .seealso: `Mat`, `MatSetValuesBlocked()`
3428 @*/
3429 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3430 {
3431   /* convert input arguments to C version */
3432   Mat        mat = *matin;
3433   PetscInt   m = *min, n = *nin;
3434   InsertMode addv = *addvin;
3435 
3436   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3437   const MatScalar *value;
3438   MatScalar       *barray      = baij->barray;
3439   PetscBool        roworiented = baij->roworiented;
3440   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3441   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3442   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3443 
3444   PetscFunctionBegin;
3445   /* tasks normally handled by MatSetValuesBlocked() */
3446   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3447   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3448   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3449   if (mat->assembled) {
3450     mat->was_assembled = PETSC_TRUE;
3451     mat->assembled     = PETSC_FALSE;
3452   }
3453   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3454 
3455   if (!barray) {
3456     PetscCall(PetscMalloc1(bs2, &barray));
3457     baij->barray = barray;
3458   }
3459 
3460   if (roworiented) stepval = (n - 1) * bs;
3461   else stepval = (m - 1) * bs;
3462 
3463   for (i = 0; i < m; i++) {
3464     if (im[i] < 0) continue;
3465     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3466     if (im[i] >= rstart && im[i] < rend) {
3467       row = im[i] - rstart;
3468       for (j = 0; j < n; j++) {
3469         /* If NumCol = 1 then a copy is not required */
3470         if ((roworiented) && (n == 1)) {
3471           barray = (MatScalar *)v + i * bs2;
3472         } else if ((!roworiented) && (m == 1)) {
3473           barray = (MatScalar *)v + j * bs2;
3474         } else { /* Here a copy is required */
3475           if (roworiented) {
3476             value = v + i * (stepval + bs) * bs + j * bs;
3477           } else {
3478             value = v + j * (stepval + bs) * bs + i * bs;
3479           }
3480           for (ii = 0; ii < bs; ii++, value += stepval) {
3481             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3482           }
3483           barray -= bs2;
3484         }
3485 
3486         if (in[j] >= cstart && in[j] < cend) {
3487           col = in[j] - cstart;
3488           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3489         } else if (in[j] < 0) {
3490           continue;
3491         } else {
3492           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3493           if (mat->was_assembled) {
3494             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3495 
3496 #if defined(PETSC_USE_DEBUG)
3497   #if defined(PETSC_USE_CTABLE)
3498             {
3499               PetscInt data;
3500               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3501               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3502             }
3503   #else
3504             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3505   #endif
3506 #endif
3507 #if defined(PETSC_USE_CTABLE)
3508             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3509             col = (col - 1) / bs;
3510 #else
3511             col = (baij->colmap[in[j]] - 1) / bs;
3512 #endif
3513             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3514               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3515               col = in[j];
3516             }
3517           } else col = in[j];
3518           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3519         }
3520       }
3521     } else {
3522       if (!baij->donotstash) {
3523         if (roworiented) {
3524           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3525         } else {
3526           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3527         }
3528       }
3529     }
3530   }
3531 
3532   /* task normally handled by MatSetValuesBlocked() */
3533   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3534   PetscFunctionReturn(PETSC_SUCCESS);
3535 }
3536 
3537 /*@
3538      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3539          CSR format the local rows.
3540 
3541    Collective
3542 
3543    Input Parameters:
3544 +  comm - MPI communicator
3545 .  bs - the block size, only a block size of 1 is supported
3546 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3547 .  n - This value should be the same as the local size used in creating the
3548        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3549        calculated if N is given) For square matrices n is almost always m.
3550 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3551 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3552 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3553 .   j - column indices
3554 -   a - matrix values
3555 
3556    Output Parameter:
3557 .   mat - the matrix
3558 
3559    Level: intermediate
3560 
3561    Notes:
3562        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3563      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3564      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3565 
3566      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3567      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3568      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3569      with column-major ordering within blocks.
3570 
3571        The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3572 
3573 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3574           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3575 @*/
3576 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3577 {
3578   PetscFunctionBegin;
3579   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3580   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3581   PetscCall(MatCreate(comm, mat));
3582   PetscCall(MatSetSizes(*mat, m, n, M, N));
3583   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3584   PetscCall(MatSetBlockSize(*mat, bs));
3585   PetscCall(MatSetUp(*mat));
3586   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3587   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3588   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3589   PetscFunctionReturn(PETSC_SUCCESS);
3590 }
3591 
3592 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3593 {
3594   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3595   PetscInt    *indx;
3596   PetscScalar *values;
3597 
3598   PetscFunctionBegin;
3599   PetscCall(MatGetSize(inmat, &m, &N));
3600   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3601     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3602     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3603     PetscInt    *bindx, rmax = a->rmax, j;
3604     PetscMPIInt  rank, size;
3605 
3606     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3607     mbs = m / bs;
3608     Nbs = N / cbs;
3609     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3610     nbs = n / cbs;
3611 
3612     PetscCall(PetscMalloc1(rmax, &bindx));
3613     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3614 
3615     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3616     PetscCallMPI(MPI_Comm_rank(comm, &size));
3617     if (rank == size - 1) {
3618       /* Check sum(nbs) = Nbs */
3619       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3620     }
3621 
3622     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3623     for (i = 0; i < mbs; i++) {
3624       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3625       nnz = nnz / bs;
3626       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3627       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3628       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3629     }
3630     PetscCall(PetscFree(bindx));
3631 
3632     PetscCall(MatCreate(comm, outmat));
3633     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3634     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3635     PetscCall(MatSetType(*outmat, MATBAIJ));
3636     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3637     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3638     MatPreallocateEnd(dnz, onz);
3639     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3640   }
3641 
3642   /* numeric phase */
3643   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3644   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3645 
3646   for (i = 0; i < m; i++) {
3647     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3648     Ii = i + rstart;
3649     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3650     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3651   }
3652   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3653   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3654   PetscFunctionReturn(PETSC_SUCCESS);
3655 }
3656