xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d8e4f26e7eadd05aa3d9f3d0f2220b3e914971d9)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
14   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
15   PetscScalar       *va, *vv;
16   Vec                vB, vA;
17   const PetscScalar *vb;
18 
19   PetscFunctionBegin;
20   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
21   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
22 
23   PetscCall(VecGetArrayWrite(vA, &va));
24   if (idx) {
25     for (i = 0; i < m; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
31   PetscCall(PetscMalloc1(m, &idxb));
32   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
33 
34   PetscCall(VecGetArrayWrite(v, &vv));
35   PetscCall(VecGetArrayRead(vB, &vb));
36   for (i = 0; i < m; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
38       vv[i] = vb[i];
39       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
40     } else {
41       vv[i] = va[i];
42       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
43     }
44   }
45   PetscCall(VecRestoreArrayWrite(vA, &vv));
46   PetscCall(VecRestoreArrayWrite(vA, &va));
47   PetscCall(VecRestoreArrayRead(vB, &vb));
48   PetscCall(PetscFree(idxb));
49   PetscCall(VecDestroy(&vA));
50   PetscCall(VecDestroy(&vB));
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
57 
58   PetscFunctionBegin;
59   PetscCall(MatStoreValues(aij->A));
60   PetscCall(MatStoreValues(aij->B));
61   PetscFunctionReturn(0);
62 }
63 
64 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
65 {
66   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
67 
68   PetscFunctionBegin;
69   PetscCall(MatRetrieveValues(aij->A));
70   PetscCall(MatRetrieveValues(aij->B));
71   PetscFunctionReturn(0);
72 }
73 
74 /*
75      Local utility routine that creates a mapping from the global column
76    number to the local number in the off-diagonal part of the local
77    storage of the matrix.  This is done in a non scalable way since the
78    length of colmap equals the global matrix length.
79 */
80 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
81 {
82   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
83   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
84   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
85 
86   PetscFunctionBegin;
87 #if defined(PETSC_USE_CTABLE)
88   PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap));
89   for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES));
90 #else
91   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
92   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
98   { \
99     brow = row / bs; \
100     rp   = aj + ai[brow]; \
101     ap   = aa + bs2 * ai[brow]; \
102     rmax = aimax[brow]; \
103     nrow = ailen[brow]; \
104     bcol = col / bs; \
105     ridx = row % bs; \
106     cidx = col % bs; \
107     low  = 0; \
108     high = nrow; \
109     while (high - low > 3) { \
110       t = (low + high) / 2; \
111       if (rp[t] > bcol) high = t; \
112       else low = t; \
113     } \
114     for (_i = low; _i < high; _i++) { \
115       if (rp[_i] > bcol) break; \
116       if (rp[_i] == bcol) { \
117         bap = ap + bs2 * _i + bs * cidx + ridx; \
118         if (addv == ADD_VALUES) *bap += value; \
119         else *bap = value; \
120         goto a_noinsert; \
121       } \
122     } \
123     if (a->nonew == 1) goto a_noinsert; \
124     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
125     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
126     N = nrow++ - 1; \
127     /* shift up all the later entries in this row */ \
128     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
129     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
130     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
131     rp[_i]                          = bcol; \
132     ap[bs2 * _i + bs * cidx + ridx] = value; \
133   a_noinsert:; \
134     ailen[brow] = nrow; \
135   }
136 
137 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
138   { \
139     brow = row / bs; \
140     rp   = bj + bi[brow]; \
141     ap   = ba + bs2 * bi[brow]; \
142     rmax = bimax[brow]; \
143     nrow = bilen[brow]; \
144     bcol = col / bs; \
145     ridx = row % bs; \
146     cidx = col % bs; \
147     low  = 0; \
148     high = nrow; \
149     while (high - low > 3) { \
150       t = (low + high) / 2; \
151       if (rp[t] > bcol) high = t; \
152       else low = t; \
153     } \
154     for (_i = low; _i < high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap + bs2 * _i + bs * cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value; \
159         else *bap = value; \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
166     N = nrow++ - 1; \
167     /* shift up all the later entries in this row */ \
168     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
169     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
170     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
171     rp[_i]                          = bcol; \
172     ap[bs2 * _i + bs * cidx + ridx] = value; \
173   b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
178 {
179   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
180   MatScalar    value;
181   PetscBool    roworiented = baij->roworiented;
182   PetscInt     i, j, row, col;
183   PetscInt     rstart_orig = mat->rmap->rstart;
184   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
185   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
186 
187   /* Some Variables required in the macro */
188   Mat          A     = baij->A;
189   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
190   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
191   MatScalar   *aa = a->a;
192 
193   Mat          B     = baij->B;
194   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
195   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
196   MatScalar   *ba = b->a;
197 
198   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
199   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
200   MatScalar *ap, *bap;
201 
202   PetscFunctionBegin;
203   for (i = 0; i < m; i++) {
204     if (im[i] < 0) continue;
205     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j = 0; j < n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i * n + j];
212           else value = v[i + j * m];
213           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
214         } else if (in[j] < 0) {
215           continue;
216         } else {
217           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
218           if (mat->was_assembled) {
219             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
220 #if defined(PETSC_USE_CTABLE)
221             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
222             col = col - 1;
223 #else
224             col = baij->colmap[in[j] / bs] - 1;
225 #endif
226             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
227               PetscCall(MatDisAssemble_MPIBAIJ(mat));
228               col = in[j];
229               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
230               B     = baij->B;
231               b     = (Mat_SeqBAIJ *)(B)->data;
232               bimax = b->imax;
233               bi    = b->i;
234               bilen = b->ilen;
235               bj    = b->j;
236               ba    = b->a;
237             } else {
238               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
239               col += in[j] % bs;
240             }
241           } else col = in[j];
242           if (roworiented) value = v[i * n + j];
243           else value = v[i + j * m];
244           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
245           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
246         }
247       }
248     } else {
249       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
250       if (!baij->donotstash) {
251         mat->assembled = PETSC_FALSE;
252         if (roworiented) {
253           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
254         } else {
255           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
256         }
257       }
258     }
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
264 {
265   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
266   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
267   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
268   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
269   PetscBool          roworiented = a->roworiented;
270   const PetscScalar *value       = v;
271   MatScalar         *ap, *aa = a->a, *bap;
272 
273   PetscFunctionBegin;
274   rp    = aj + ai[row];
275   ap    = aa + bs2 * ai[row];
276   rmax  = imax[row];
277   nrow  = ailen[row];
278   value = v;
279   low   = 0;
280   high  = nrow;
281   while (high - low > 7) {
282     t = (low + high) / 2;
283     if (rp[t] > col) high = t;
284     else low = t;
285   }
286   for (i = low; i < high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap + bs2 * i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii = 0; ii < bs; ii++) {
293             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
294           }
295         } else {
296           for (ii = 0; ii < bs; ii++) {
297             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
298           }
299         }
300       } else {
301         if (is == ADD_VALUES) {
302           for (ii = 0; ii < bs; ii++, value += bs) {
303             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
304             bap += bs;
305           }
306         } else {
307           for (ii = 0; ii < bs; ii++, value += bs) {
308             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
309             bap += bs;
310           }
311         }
312       }
313       goto noinsert2;
314     }
315   }
316   if (nonew == 1) goto noinsert2;
317   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
318   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
319   N = nrow++ - 1;
320   high++;
321   /* shift up all the later entries in this row */
322   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
323   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
324   rp[i] = col;
325   bap   = ap + bs2 * i;
326   if (roworiented) {
327     for (ii = 0; ii < bs; ii++) {
328       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
329     }
330   } else {
331     for (ii = 0; ii < bs; ii++) {
332       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
333     }
334   }
335 noinsert2:;
336   ailen[row] = nrow;
337   PetscFunctionReturn(0);
338 }
339 
340 /*
341     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
342     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
343 */
344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
345 {
346   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
347   const PetscScalar *value;
348   MatScalar         *barray      = baij->barray;
349   PetscBool          roworiented = baij->roworiented;
350   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
351   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
352   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
353 
354   PetscFunctionBegin;
355   if (!barray) {
356     PetscCall(PetscMalloc1(bs2, &barray));
357     baij->barray = barray;
358   }
359 
360   if (roworiented) stepval = (n - 1) * bs;
361   else stepval = (m - 1) * bs;
362 
363   for (i = 0; i < m; i++) {
364     if (im[i] < 0) continue;
365     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
366     if (im[i] >= rstart && im[i] < rend) {
367       row = im[i] - rstart;
368       for (j = 0; j < n; j++) {
369         /* If NumCol = 1 then a copy is not required */
370         if ((roworiented) && (n == 1)) {
371           barray = (MatScalar *)v + i * bs2;
372         } else if ((!roworiented) && (m == 1)) {
373           barray = (MatScalar *)v + j * bs2;
374         } else { /* Here a copy is required */
375           if (roworiented) {
376             value = v + (i * (stepval + bs) + j) * bs;
377           } else {
378             value = v + (j * (stepval + bs) + i) * bs;
379           }
380           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
381             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
382             barray += bs;
383           }
384           barray -= bs2;
385         }
386 
387         if (in[j] >= cstart && in[j] < cend) {
388           col = in[j] - cstart;
389           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
390         } else if (in[j] < 0) {
391           continue;
392         } else {
393           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
394           if (mat->was_assembled) {
395             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
396 
397 #if defined(PETSC_USE_DEBUG)
398   #if defined(PETSC_USE_CTABLE)
399             {
400               PetscInt data;
401               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
402               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
403             }
404   #else
405             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
406   #endif
407 #endif
408 #if defined(PETSC_USE_CTABLE)
409             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
410             col = (col - 1) / bs;
411 #else
412             col = (baij->colmap[in[j]] - 1) / bs;
413 #endif
414             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
415               PetscCall(MatDisAssemble_MPIBAIJ(mat));
416               col = in[j];
417             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
418           } else col = in[j];
419           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
420         }
421       }
422     } else {
423       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
424       if (!baij->donotstash) {
425         if (roworiented) {
426           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
427         } else {
428           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
429         }
430       }
431     }
432   }
433   PetscFunctionReturn(0);
434 }
435 
436 #define HASH_KEY             0.6180339887
437 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
438 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
439 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
440 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
441 {
442   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
443   PetscBool    roworiented = baij->roworiented;
444   PetscInt     i, j, row, col;
445   PetscInt     rstart_orig = mat->rmap->rstart;
446   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
447   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
448   PetscReal    tmp;
449   MatScalar  **HD       = baij->hd, value;
450   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
451 
452   PetscFunctionBegin;
453   for (i = 0; i < m; i++) {
454     if (PetscDefined(USE_DEBUG)) {
455       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
456       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     }
458     row = im[i];
459     if (row >= rstart_orig && row < rend_orig) {
460       for (j = 0; j < n; j++) {
461         col = in[j];
462         if (roworiented) value = v[i * n + j];
463         else value = v[i + j * m];
464         /* Look up PetscInto the Hash Table */
465         key = (row / bs) * Nbs + (col / bs) + 1;
466         h1  = HASH(size, key, tmp);
467 
468         idx = h1;
469         if (PetscDefined(USE_DEBUG)) {
470           insert_ct++;
471           total_ct++;
472           if (HT[idx] != key) {
473             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
474               ;
475             if (idx == size) {
476               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
477                 ;
478               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
479             }
480           }
481         } else if (HT[idx] != key) {
482           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
483             ;
484           if (idx == size) {
485             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
486               ;
487             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
488           }
489         }
490         /* A HASH table entry is found, so insert the values at the correct address */
491         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
492         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
493       }
494     } else if (!baij->donotstash) {
495       if (roworiented) {
496         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
497       } else {
498         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
499       }
500     }
501   }
502   if (PetscDefined(USE_DEBUG)) {
503     baij->ht_total_ct += total_ct;
504     baij->ht_insert_ct += insert_ct;
505   }
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
512   PetscBool          roworiented = baij->roworiented;
513   PetscInt           i, j, ii, jj, row, col;
514   PetscInt           rstart = baij->rstartbs;
515   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
516   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
517   PetscReal          tmp;
518   MatScalar        **HD = baij->hd, *baij_a;
519   const PetscScalar *v_t, *value;
520   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
521 
522   PetscFunctionBegin;
523   if (roworiented) stepval = (n - 1) * bs;
524   else stepval = (m - 1) * bs;
525 
526   for (i = 0; i < m; i++) {
527     if (PetscDefined(USE_DEBUG)) {
528       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
529       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
530     }
531     row = im[i];
532     v_t = v + i * nbs2;
533     if (row >= rstart && row < rend) {
534       for (j = 0; j < n; j++) {
535         col = in[j];
536 
537         /* Look up into the Hash Table */
538         key = row * Nbs + col + 1;
539         h1  = HASH(size, key, tmp);
540 
541         idx = h1;
542         if (PetscDefined(USE_DEBUG)) {
543           total_ct++;
544           insert_ct++;
545           if (HT[idx] != key) {
546             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
547               ;
548             if (idx == size) {
549               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
550                 ;
551               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
552             }
553           }
554         } else if (HT[idx] != key) {
555           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
556             ;
557           if (idx == size) {
558             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
559               ;
560             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
561           }
562         }
563         baij_a = HD[idx];
564         if (roworiented) {
565           /*value = v + i*(stepval+bs)*bs + j*bs;*/
566           /* value = v + (i*(stepval+bs)+j)*bs; */
567           value = v_t;
568           v_t += bs;
569           if (addv == ADD_VALUES) {
570             for (ii = 0; ii < bs; ii++, value += stepval) {
571               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
572             }
573           } else {
574             for (ii = 0; ii < bs; ii++, value += stepval) {
575               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
576             }
577           }
578         } else {
579           value = v + j * (stepval + bs) * bs + i * bs;
580           if (addv == ADD_VALUES) {
581             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
582               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
583             }
584           } else {
585             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
586               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
587             }
588           }
589         }
590       }
591     } else {
592       if (!baij->donotstash) {
593         if (roworiented) {
594           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
595         } else {
596           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
597         }
598       }
599     }
600   }
601   if (PetscDefined(USE_DEBUG)) {
602     baij->ht_total_ct += total_ct;
603     baij->ht_insert_ct += insert_ct;
604   }
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
609 {
610   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
611   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
612   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
613 
614   PetscFunctionBegin;
615   for (i = 0; i < m; i++) {
616     if (idxm[i] < 0) continue; /* negative row */
617     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
618     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
619       row = idxm[i] - bsrstart;
620       for (j = 0; j < n; j++) {
621         if (idxn[j] < 0) continue; /* negative column */
622         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
623         if (idxn[j] >= bscstart && idxn[j] < bscend) {
624           col = idxn[j] - bscstart;
625           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
626         } else {
627           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
628 #if defined(PETSC_USE_CTABLE)
629           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
630           data--;
631 #else
632           data = baij->colmap[idxn[j] / bs] - 1;
633 #endif
634           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
635           else {
636             col = data + idxn[j] % bs;
637             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
638           }
639         }
640       }
641     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
642   }
643   PetscFunctionReturn(0);
644 }
645 
646 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
647 {
648   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
649   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
650   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
651   PetscReal    sum = 0.0;
652   MatScalar   *v;
653 
654   PetscFunctionBegin;
655   if (baij->size == 1) {
656     PetscCall(MatNorm(baij->A, type, nrm));
657   } else {
658     if (type == NORM_FROBENIUS) {
659       v  = amat->a;
660       nz = amat->nz * bs2;
661       for (i = 0; i < nz; i++) {
662         sum += PetscRealPart(PetscConj(*v) * (*v));
663         v++;
664       }
665       v  = bmat->a;
666       nz = bmat->nz * bs2;
667       for (i = 0; i < nz; i++) {
668         sum += PetscRealPart(PetscConj(*v) * (*v));
669         v++;
670       }
671       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
672       *nrm = PetscSqrtReal(*nrm);
673     } else if (type == NORM_1) { /* max column sum */
674       PetscReal *tmp, *tmp2;
675       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
676       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
677       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
678       v  = amat->a;
679       jj = amat->j;
680       for (i = 0; i < amat->nz; i++) {
681         for (j = 0; j < bs; j++) {
682           col = bs * (cstart + *jj) + j; /* column index */
683           for (row = 0; row < bs; row++) {
684             tmp[col] += PetscAbsScalar(*v);
685             v++;
686           }
687         }
688         jj++;
689       }
690       v  = bmat->a;
691       jj = bmat->j;
692       for (i = 0; i < bmat->nz; i++) {
693         for (j = 0; j < bs; j++) {
694           col = bs * garray[*jj] + j;
695           for (row = 0; row < bs; row++) {
696             tmp[col] += PetscAbsScalar(*v);
697             v++;
698           }
699         }
700         jj++;
701       }
702       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
703       *nrm = 0.0;
704       for (j = 0; j < mat->cmap->N; j++) {
705         if (tmp2[j] > *nrm) *nrm = tmp2[j];
706       }
707       PetscCall(PetscFree(tmp));
708       PetscCall(PetscFree(tmp2));
709     } else if (type == NORM_INFINITY) { /* max row sum */
710       PetscReal *sums;
711       PetscCall(PetscMalloc1(bs, &sums));
712       sum = 0.0;
713       for (j = 0; j < amat->mbs; j++) {
714         for (row = 0; row < bs; row++) sums[row] = 0.0;
715         v  = amat->a + bs2 * amat->i[j];
716         nz = amat->i[j + 1] - amat->i[j];
717         for (i = 0; i < nz; i++) {
718           for (col = 0; col < bs; col++) {
719             for (row = 0; row < bs; row++) {
720               sums[row] += PetscAbsScalar(*v);
721               v++;
722             }
723           }
724         }
725         v  = bmat->a + bs2 * bmat->i[j];
726         nz = bmat->i[j + 1] - bmat->i[j];
727         for (i = 0; i < nz; i++) {
728           for (col = 0; col < bs; col++) {
729             for (row = 0; row < bs; row++) {
730               sums[row] += PetscAbsScalar(*v);
731               v++;
732             }
733           }
734         }
735         for (row = 0; row < bs; row++) {
736           if (sums[row] > sum) sum = sums[row];
737         }
738       }
739       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
740       PetscCall(PetscFree(sums));
741     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 /*
747   Creates the hash table, and sets the table
748   This table is created only once.
749   If new entried need to be added to the matrix
750   then the hash table has to be destroyed and
751   recreated.
752 */
753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
754 {
755   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
756   Mat          A = baij->A, B = baij->B;
757   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
758   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
759   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
760   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
761   PetscInt    *HT, key;
762   MatScalar  **HD;
763   PetscReal    tmp;
764 #if defined(PETSC_USE_INFO)
765   PetscInt ct = 0, max = 0;
766 #endif
767 
768   PetscFunctionBegin;
769   if (baij->ht) PetscFunctionReturn(0);
770 
771   baij->ht_size = (PetscInt)(factor * nz);
772   ht_size       = baij->ht_size;
773 
774   /* Allocate Memory for Hash Table */
775   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
776   HD = baij->hd;
777   HT = baij->ht;
778 
779   /* Loop Over A */
780   for (i = 0; i < a->mbs; i++) {
781     for (j = ai[i]; j < ai[i + 1]; j++) {
782       row = i + rstart;
783       col = aj[j] + cstart;
784 
785       key = row * Nbs + col + 1;
786       h1  = HASH(ht_size, key, tmp);
787       for (k = 0; k < ht_size; k++) {
788         if (!HT[(h1 + k) % ht_size]) {
789           HT[(h1 + k) % ht_size] = key;
790           HD[(h1 + k) % ht_size] = a->a + j * bs2;
791           break;
792 #if defined(PETSC_USE_INFO)
793         } else {
794           ct++;
795 #endif
796         }
797       }
798 #if defined(PETSC_USE_INFO)
799       if (k > max) max = k;
800 #endif
801     }
802   }
803   /* Loop Over B */
804   for (i = 0; i < b->mbs; i++) {
805     for (j = bi[i]; j < bi[i + 1]; j++) {
806       row = i + rstart;
807       col = garray[bj[j]];
808       key = row * Nbs + col + 1;
809       h1  = HASH(ht_size, key, tmp);
810       for (k = 0; k < ht_size; k++) {
811         if (!HT[(h1 + k) % ht_size]) {
812           HT[(h1 + k) % ht_size] = key;
813           HD[(h1 + k) % ht_size] = b->a + j * bs2;
814           break;
815 #if defined(PETSC_USE_INFO)
816         } else {
817           ct++;
818 #endif
819         }
820       }
821 #if defined(PETSC_USE_INFO)
822       if (k > max) max = k;
823 #endif
824     }
825   }
826 
827   /* Print Summary */
828 #if defined(PETSC_USE_INFO)
829   for (i = 0, j = 0; i < ht_size; i++) {
830     if (HT[i]) j++;
831   }
832   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
833 #endif
834   PetscFunctionReturn(0);
835 }
836 
837 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
838 {
839   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
840   PetscInt     nstash, reallocs;
841 
842   PetscFunctionBegin;
843   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
844 
845   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
846   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
847   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
848   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
849   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
850   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
857   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
858   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
859   PetscInt    *row, *col;
860   PetscBool    r1, r2, r3, other_disassembled;
861   MatScalar   *val;
862   PetscMPIInt  n;
863 
864   PetscFunctionBegin;
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   if (!baij->donotstash && !mat->nooffprocentries) {
867     while (1) {
868       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
869       if (!flg) break;
870 
871       for (i = 0; i < n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j = i, rstart = row[j]; j < n; j++) {
874           if (row[j] != rstart) break;
875         }
876         if (j < n) ncols = j - i;
877         else ncols = n - i;
878         /* Now assemble all these values with a single function call */
879         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
880         i = j;
881       }
882     }
883     PetscCall(MatStashScatterEnd_Private(&mat->stash));
884     /* Now process the block-stash. Since the values are stashed column-oriented,
885        set the roworiented flag to column oriented, and after MatSetValues()
886        restore the original flags */
887     r1 = baij->roworiented;
888     r2 = a->roworiented;
889     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
890 
891     baij->roworiented = PETSC_FALSE;
892     a->roworiented    = PETSC_FALSE;
893 
894     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
895     while (1) {
896       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
897       if (!flg) break;
898 
899       for (i = 0; i < n;) {
900         /* Now identify the consecutive vals belonging to the same row */
901         for (j = i, rstart = row[j]; j < n; j++) {
902           if (row[j] != rstart) break;
903         }
904         if (j < n) ncols = j - i;
905         else ncols = n - i;
906         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
907         i = j;
908       }
909     }
910     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
911 
912     baij->roworiented = r1;
913     a->roworiented    = r2;
914 
915     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
916   }
917 
918   PetscCall(MatAssemblyBegin(baij->A, mode));
919   PetscCall(MatAssemblyEnd(baij->A, mode));
920 
921   /* determine if any processor has disassembled, if so we must
922      also disassemble ourselves, in order that we may reassemble. */
923   /*
924      if nonzero structure of submatrix B cannot change then we know that
925      no processor disassembled thus we can skip this stuff
926   */
927   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
928     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
929     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
930   }
931 
932   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
933   PetscCall(MatAssemblyBegin(baij->B, mode));
934   PetscCall(MatAssemblyEnd(baij->B, mode));
935 
936 #if defined(PETSC_USE_INFO)
937   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
938     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
939 
940     baij->ht_total_ct  = 0;
941     baij->ht_insert_ct = 0;
942   }
943 #endif
944   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
945     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
946 
947     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
948     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
949   }
950 
951   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
952 
953   baij->rowvalues = NULL;
954 
955   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
956   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
957     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
958     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
964 #include <petscdraw.h>
965 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
966 {
967   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
968   PetscMPIInt       rank = baij->rank;
969   PetscInt          bs   = mat->rmap->bs;
970   PetscBool         iascii, isdraw;
971   PetscViewer       sviewer;
972   PetscViewerFormat format;
973 
974   PetscFunctionBegin;
975   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
976   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
977   if (iascii) {
978     PetscCall(PetscViewerGetFormat(viewer, &format));
979     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
980       MatInfo info;
981       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
982       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
983       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
984       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
985                                                    mat->rmap->bs, (double)info.memory));
986       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
987       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
988       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
989       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
990       PetscCall(PetscViewerFlush(viewer));
991       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
992       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
993       PetscCall(VecScatterView(baij->Mvctx, viewer));
994       PetscFunctionReturn(0);
995     } else if (format == PETSC_VIEWER_ASCII_INFO) {
996       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
997       PetscFunctionReturn(0);
998     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
999       PetscFunctionReturn(0);
1000     }
1001   }
1002 
1003   if (isdraw) {
1004     PetscDraw draw;
1005     PetscBool isnull;
1006     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1007     PetscCall(PetscDrawIsNull(draw, &isnull));
1008     if (isnull) PetscFunctionReturn(0);
1009   }
1010 
1011   {
1012     /* assemble the entire matrix onto first processor. */
1013     Mat          A;
1014     Mat_SeqBAIJ *Aloc;
1015     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1016     MatScalar   *a;
1017     const char  *matname;
1018 
1019     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1020     /* Perhaps this should be the type of mat? */
1021     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1022     if (rank == 0) {
1023       PetscCall(MatSetSizes(A, M, N, M, N));
1024     } else {
1025       PetscCall(MatSetSizes(A, 0, 0, M, N));
1026     }
1027     PetscCall(MatSetType(A, MATMPIBAIJ));
1028     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1029     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1030 
1031     /* copy over the A part */
1032     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1033     ai   = Aloc->i;
1034     aj   = Aloc->j;
1035     a    = Aloc->a;
1036     PetscCall(PetscMalloc1(bs, &rvals));
1037 
1038     for (i = 0; i < mbs; i++) {
1039       rvals[0] = bs * (baij->rstartbs + i);
1040       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1041       for (j = ai[i]; j < ai[i + 1]; j++) {
1042         col = (baij->cstartbs + aj[j]) * bs;
1043         for (k = 0; k < bs; k++) {
1044           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1045           col++;
1046           a += bs;
1047         }
1048       }
1049     }
1050     /* copy over the B part */
1051     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1052     ai   = Aloc->i;
1053     aj   = Aloc->j;
1054     a    = Aloc->a;
1055     for (i = 0; i < mbs; i++) {
1056       rvals[0] = bs * (baij->rstartbs + i);
1057       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1058       for (j = ai[i]; j < ai[i + 1]; j++) {
1059         col = baij->garray[aj[j]] * bs;
1060         for (k = 0; k < bs; k++) {
1061           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1062           col++;
1063           a += bs;
1064         }
1065       }
1066     }
1067     PetscCall(PetscFree(rvals));
1068     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1070     /*
1071        Everyone has to call to draw the matrix since the graphics waits are
1072        synchronized across all processors that share the PetscDraw object
1073     */
1074     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1075     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1076     if (rank == 0) {
1077       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1078       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1079     }
1080     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1081     PetscCall(PetscViewerFlush(viewer));
1082     PetscCall(MatDestroy(&A));
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1089 {
1090   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1091   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1092   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1093   const PetscInt *garray = aij->garray;
1094   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1095   PetscInt       *rowlens, *colidxs;
1096   PetscScalar    *matvals;
1097 
1098   PetscFunctionBegin;
1099   PetscCall(PetscViewerSetUp(viewer));
1100 
1101   M  = mat->rmap->N;
1102   N  = mat->cmap->N;
1103   m  = mat->rmap->n;
1104   rs = mat->rmap->rstart;
1105   cs = mat->cmap->rstart;
1106   bs = mat->rmap->bs;
1107   nz = bs * bs * (A->nz + B->nz);
1108 
1109   /* write matrix header */
1110   header[0] = MAT_FILE_CLASSID;
1111   header[1] = M;
1112   header[2] = N;
1113   header[3] = nz;
1114   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1115   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1116 
1117   /* fill in and store row lengths */
1118   PetscCall(PetscMalloc1(m, &rowlens));
1119   for (cnt = 0, i = 0; i < A->mbs; i++)
1120     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1121   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1122   PetscCall(PetscFree(rowlens));
1123 
1124   /* fill in and store column indices */
1125   PetscCall(PetscMalloc1(nz, &colidxs));
1126   for (cnt = 0, i = 0; i < A->mbs; i++) {
1127     for (k = 0; k < bs; k++) {
1128       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1129         if (garray[B->j[jb]] > cs / bs) break;
1130         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1131       }
1132       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1133         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1134       for (; jb < B->i[i + 1]; jb++)
1135         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1136     }
1137   }
1138   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1139   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1140   PetscCall(PetscFree(colidxs));
1141 
1142   /* fill in and store nonzero values */
1143   PetscCall(PetscMalloc1(nz, &matvals));
1144   for (cnt = 0, i = 0; i < A->mbs; i++) {
1145     for (k = 0; k < bs; k++) {
1146       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1147         if (garray[B->j[jb]] > cs / bs) break;
1148         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1149       }
1150       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1151         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1152       for (; jb < B->i[i + 1]; jb++)
1153         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1154     }
1155   }
1156   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1157   PetscCall(PetscFree(matvals));
1158 
1159   /* write block size option to the viewer's .info file */
1160   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1165 {
1166   PetscBool iascii, isdraw, issocket, isbinary;
1167 
1168   PetscFunctionBegin;
1169   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1170   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1171   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1172   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1173   if (iascii || isdraw || issocket) {
1174     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1175   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1180 {
1181   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1182 
1183   PetscFunctionBegin;
1184 #if defined(PETSC_USE_LOG)
1185   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1186 #endif
1187   PetscCall(MatStashDestroy_Private(&mat->stash));
1188   PetscCall(MatStashDestroy_Private(&mat->bstash));
1189   PetscCall(MatDestroy(&baij->A));
1190   PetscCall(MatDestroy(&baij->B));
1191 #if defined(PETSC_USE_CTABLE)
1192   PetscCall(PetscTableDestroy(&baij->colmap));
1193 #else
1194   PetscCall(PetscFree(baij->colmap));
1195 #endif
1196   PetscCall(PetscFree(baij->garray));
1197   PetscCall(VecDestroy(&baij->lvec));
1198   PetscCall(VecScatterDestroy(&baij->Mvctx));
1199   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1200   PetscCall(PetscFree(baij->barray));
1201   PetscCall(PetscFree2(baij->hd, baij->ht));
1202   PetscCall(PetscFree(baij->rangebs));
1203   PetscCall(PetscFree(mat->data));
1204 
1205   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1206   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1207   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1208   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
1209   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
1210   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
1215 #if defined(PETSC_HAVE_HYPRE)
1216   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
1217 #endif
1218   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1225   PetscInt     nt;
1226 
1227   PetscFunctionBegin;
1228   PetscCall(VecGetLocalSize(xx, &nt));
1229   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1230   PetscCall(VecGetLocalSize(yy, &nt));
1231   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1232   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1233   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1234   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1235   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1240 {
1241   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1245   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1246   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1247   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1252 {
1253   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1254 
1255   PetscFunctionBegin;
1256   /* do nondiagonal part */
1257   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1258   /* do local part */
1259   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1260   /* add partial results together */
1261   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1262   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1269 
1270   PetscFunctionBegin;
1271   /* do nondiagonal part */
1272   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1273   /* do local part */
1274   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1275   /* add partial results together */
1276   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1277   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 /*
1282   This only works correctly for square matrices where the subblock A->A is the
1283    diagonal block
1284 */
1285 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1286 {
1287   PetscFunctionBegin;
1288   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1289   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1294 {
1295   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(MatScale(a->A, aa));
1299   PetscCall(MatScale(a->B, aa));
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1304 {
1305   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1306   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1307   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1308   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1309   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1310 
1311   PetscFunctionBegin;
1312   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1313   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1314   mat->getrowactive = PETSC_TRUE;
1315 
1316   if (!mat->rowvalues && (idx || v)) {
1317     /*
1318         allocate enough space to hold information from the longest row.
1319     */
1320     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1321     PetscInt     max = 1, mbs = mat->mbs, tmp;
1322     for (i = 0; i < mbs; i++) {
1323       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1324       if (max < tmp) max = tmp;
1325     }
1326     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1327   }
1328   lrow = row - brstart;
1329 
1330   pvA = &vworkA;
1331   pcA = &cworkA;
1332   pvB = &vworkB;
1333   pcB = &cworkB;
1334   if (!v) {
1335     pvA = NULL;
1336     pvB = NULL;
1337   }
1338   if (!idx) {
1339     pcA = NULL;
1340     if (!v) pcB = NULL;
1341   }
1342   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1343   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1344   nztot = nzA + nzB;
1345 
1346   cmap = mat->garray;
1347   if (v || idx) {
1348     if (nztot) {
1349       /* Sort by increasing column numbers, assuming A and B already sorted */
1350       PetscInt imark = -1;
1351       if (v) {
1352         *v = v_p = mat->rowvalues;
1353         for (i = 0; i < nzB; i++) {
1354           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1355           else break;
1356         }
1357         imark = i;
1358         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1359         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1360       }
1361       if (idx) {
1362         *idx = idx_p = mat->rowindices;
1363         if (imark > -1) {
1364           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1365         } else {
1366           for (i = 0; i < nzB; i++) {
1367             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368             else break;
1369           }
1370           imark = i;
1371         }
1372         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1373         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1374       }
1375     } else {
1376       if (idx) *idx = NULL;
1377       if (v) *v = NULL;
1378     }
1379   }
1380   *nz = nztot;
1381   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1382   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1387 {
1388   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1389 
1390   PetscFunctionBegin;
1391   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1392   baij->getrowactive = PETSC_FALSE;
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1397 {
1398   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1399 
1400   PetscFunctionBegin;
1401   PetscCall(MatZeroEntries(l->A));
1402   PetscCall(MatZeroEntries(l->B));
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1407 {
1408   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1409   Mat            A = a->A, B = a->B;
1410   PetscLogDouble isend[5], irecv[5];
1411 
1412   PetscFunctionBegin;
1413   info->block_size = (PetscReal)matin->rmap->bs;
1414 
1415   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1416 
1417   isend[0] = info->nz_used;
1418   isend[1] = info->nz_allocated;
1419   isend[2] = info->nz_unneeded;
1420   isend[3] = info->memory;
1421   isend[4] = info->mallocs;
1422 
1423   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1424 
1425   isend[0] += info->nz_used;
1426   isend[1] += info->nz_allocated;
1427   isend[2] += info->nz_unneeded;
1428   isend[3] += info->memory;
1429   isend[4] += info->mallocs;
1430 
1431   if (flag == MAT_LOCAL) {
1432     info->nz_used      = isend[0];
1433     info->nz_allocated = isend[1];
1434     info->nz_unneeded  = isend[2];
1435     info->memory       = isend[3];
1436     info->mallocs      = isend[4];
1437   } else if (flag == MAT_GLOBAL_MAX) {
1438     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1439 
1440     info->nz_used      = irecv[0];
1441     info->nz_allocated = irecv[1];
1442     info->nz_unneeded  = irecv[2];
1443     info->memory       = irecv[3];
1444     info->mallocs      = irecv[4];
1445   } else if (flag == MAT_GLOBAL_SUM) {
1446     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1454   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1455   info->fill_ratio_needed = 0;
1456   info->factor_mallocs    = 0;
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1461 {
1462   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1463 
1464   PetscFunctionBegin;
1465   switch (op) {
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1468   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1469   case MAT_KEEP_NONZERO_PATTERN:
1470   case MAT_NEW_NONZERO_LOCATION_ERR:
1471     MatCheckPreallocated(A, 1);
1472     PetscCall(MatSetOption(a->A, op, flg));
1473     PetscCall(MatSetOption(a->B, op, flg));
1474     break;
1475   case MAT_ROW_ORIENTED:
1476     MatCheckPreallocated(A, 1);
1477     a->roworiented = flg;
1478 
1479     PetscCall(MatSetOption(a->A, op, flg));
1480     PetscCall(MatSetOption(a->B, op, flg));
1481     break;
1482   case MAT_FORCE_DIAGONAL_ENTRIES:
1483   case MAT_SORTED_FULL:
1484     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1485     break;
1486   case MAT_IGNORE_OFF_PROC_ENTRIES:
1487     a->donotstash = flg;
1488     break;
1489   case MAT_USE_HASH_TABLE:
1490     a->ht_flag = flg;
1491     a->ht_fact = 1.39;
1492     break;
1493   case MAT_SYMMETRIC:
1494   case MAT_STRUCTURALLY_SYMMETRIC:
1495   case MAT_HERMITIAN:
1496   case MAT_SUBMAT_SINGLEIS:
1497   case MAT_SYMMETRY_ETERNAL:
1498   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1499   case MAT_SPD_ETERNAL:
1500     /* if the diagonal matrix is square it inherits some of the properties above */
1501     break;
1502   default:
1503     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1504   }
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1509 {
1510   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1511   Mat_SeqBAIJ *Aloc;
1512   Mat          B;
1513   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1514   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1515   MatScalar   *a;
1516 
1517   PetscFunctionBegin;
1518   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1519   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1520     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1521     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1522     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1523     /* Do not know preallocation information, but must set block size */
1524     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1525   } else {
1526     B = *matout;
1527   }
1528 
1529   /* copy over the A part */
1530   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1531   ai   = Aloc->i;
1532   aj   = Aloc->j;
1533   a    = Aloc->a;
1534   PetscCall(PetscMalloc1(bs, &rvals));
1535 
1536   for (i = 0; i < mbs; i++) {
1537     rvals[0] = bs * (baij->rstartbs + i);
1538     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1539     for (j = ai[i]; j < ai[i + 1]; j++) {
1540       col = (baij->cstartbs + aj[j]) * bs;
1541       for (k = 0; k < bs; k++) {
1542         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1543 
1544         col++;
1545         a += bs;
1546       }
1547     }
1548   }
1549   /* copy over the B part */
1550   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1551   ai   = Aloc->i;
1552   aj   = Aloc->j;
1553   a    = Aloc->a;
1554   for (i = 0; i < mbs; i++) {
1555     rvals[0] = bs * (baij->rstartbs + i);
1556     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1557     for (j = ai[i]; j < ai[i + 1]; j++) {
1558       col = baij->garray[aj[j]] * bs;
1559       for (k = 0; k < bs; k++) {
1560         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1561         col++;
1562         a += bs;
1563       }
1564     }
1565   }
1566   PetscCall(PetscFree(rvals));
1567   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1568   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1569 
1570   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1571   else PetscCall(MatHeaderMerge(A, &B));
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1576 {
1577   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1578   Mat          a = baij->A, b = baij->B;
1579   PetscInt     s1, s2, s3;
1580 
1581   PetscFunctionBegin;
1582   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1583   if (rr) {
1584     PetscCall(VecGetLocalSize(rr, &s1));
1585     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1586     /* Overlap communication with computation. */
1587     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1588   }
1589   if (ll) {
1590     PetscCall(VecGetLocalSize(ll, &s1));
1591     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1592     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1593   }
1594   /* scale  the diagonal block */
1595   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1596 
1597   if (rr) {
1598     /* Do a scatter end and then right scale the off-diagonal block */
1599     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1600     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1606 {
1607   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1608   PetscInt    *lrows;
1609   PetscInt     r, len;
1610   PetscBool    cong;
1611 
1612   PetscFunctionBegin;
1613   /* get locally owned rows */
1614   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1615   /* fix right hand side if needed */
1616   if (x && b) {
1617     const PetscScalar *xx;
1618     PetscScalar       *bb;
1619 
1620     PetscCall(VecGetArrayRead(x, &xx));
1621     PetscCall(VecGetArray(b, &bb));
1622     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1623     PetscCall(VecRestoreArrayRead(x, &xx));
1624     PetscCall(VecRestoreArray(b, &bb));
1625   }
1626 
1627   /* actually zap the local rows */
1628   /*
1629         Zero the required rows. If the "diagonal block" of the matrix
1630      is square and the user wishes to set the diagonal we use separate
1631      code so that MatSetValues() is not called for each diagonal allocating
1632      new memory, thus calling lots of mallocs and slowing things down.
1633 
1634   */
1635   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1636   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1637   PetscCall(MatHasCongruentLayouts(A, &cong));
1638   if ((diag != 0.0) && cong) {
1639     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1640   } else if (diag != 0.0) {
1641     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1642     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1643        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1644     for (r = 0; r < len; ++r) {
1645       const PetscInt row = lrows[r] + A->rmap->rstart;
1646       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1647     }
1648     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1649     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1650   } else {
1651     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1652   }
1653   PetscCall(PetscFree(lrows));
1654 
1655   /* only change matrix nonzero state if pattern was allowed to be changed */
1656   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1657     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1658     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1659   }
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1664 {
1665   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1666   PetscMPIInt        n = A->rmap->n, p = 0;
1667   PetscInt           i, j, k, r, len = 0, row, col, count;
1668   PetscInt          *lrows, *owners = A->rmap->range;
1669   PetscSFNode       *rrows;
1670   PetscSF            sf;
1671   const PetscScalar *xx;
1672   PetscScalar       *bb, *mask;
1673   Vec                xmask, lmask;
1674   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1675   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1676   PetscScalar       *aa;
1677 
1678   PetscFunctionBegin;
1679   /* Create SF where leaves are input rows and roots are owned rows */
1680   PetscCall(PetscMalloc1(n, &lrows));
1681   for (r = 0; r < n; ++r) lrows[r] = -1;
1682   PetscCall(PetscMalloc1(N, &rrows));
1683   for (r = 0; r < N; ++r) {
1684     const PetscInt idx = rows[r];
1685     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1686     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1687       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1688     }
1689     rrows[r].rank  = p;
1690     rrows[r].index = rows[r] - owners[p];
1691   }
1692   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1693   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1694   /* Collect flags for rows to be zeroed */
1695   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1696   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1697   PetscCall(PetscSFDestroy(&sf));
1698   /* Compress and put in row numbers */
1699   for (r = 0; r < n; ++r)
1700     if (lrows[r] >= 0) lrows[len++] = r;
1701   /* zero diagonal part of matrix */
1702   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1703   /* handle off diagonal part of matrix */
1704   PetscCall(MatCreateVecs(A, &xmask, NULL));
1705   PetscCall(VecDuplicate(l->lvec, &lmask));
1706   PetscCall(VecGetArray(xmask, &bb));
1707   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1708   PetscCall(VecRestoreArray(xmask, &bb));
1709   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1710   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1711   PetscCall(VecDestroy(&xmask));
1712   if (x) {
1713     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1714     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1715     PetscCall(VecGetArrayRead(l->lvec, &xx));
1716     PetscCall(VecGetArray(b, &bb));
1717   }
1718   PetscCall(VecGetArray(lmask, &mask));
1719   /* remove zeroed rows of off diagonal matrix */
1720   for (i = 0; i < len; ++i) {
1721     row   = lrows[i];
1722     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1723     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1724     for (k = 0; k < count; ++k) {
1725       aa[0] = 0.0;
1726       aa += bs;
1727     }
1728   }
1729   /* loop over all elements of off process part of matrix zeroing removed columns*/
1730   for (i = 0; i < l->B->rmap->N; ++i) {
1731     row = i / bs;
1732     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1733       for (k = 0; k < bs; ++k) {
1734         col = bs * baij->j[j] + k;
1735         if (PetscAbsScalar(mask[col])) {
1736           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1737           if (x) bb[i] -= aa[0] * xx[col];
1738           aa[0] = 0.0;
1739         }
1740       }
1741     }
1742   }
1743   if (x) {
1744     PetscCall(VecRestoreArray(b, &bb));
1745     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1746   }
1747   PetscCall(VecRestoreArray(lmask, &mask));
1748   PetscCall(VecDestroy(&lmask));
1749   PetscCall(PetscFree(lrows));
1750 
1751   /* only change matrix nonzero state if pattern was allowed to be changed */
1752   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1753     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1754     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1760 {
1761   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1762 
1763   PetscFunctionBegin;
1764   PetscCall(MatSetUnfactored(a->A));
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1769 
1770 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1771 {
1772   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1773   Mat          a, b, c, d;
1774   PetscBool    flg;
1775 
1776   PetscFunctionBegin;
1777   a = matA->A;
1778   b = matA->B;
1779   c = matB->A;
1780   d = matB->B;
1781 
1782   PetscCall(MatEqual(a, c, &flg));
1783   if (flg) PetscCall(MatEqual(b, d, &flg));
1784   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1789 {
1790   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1791   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1792 
1793   PetscFunctionBegin;
1794   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1795   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1796     PetscCall(MatCopy_Basic(A, B, str));
1797   } else {
1798     PetscCall(MatCopy(a->A, b->A, str));
1799     PetscCall(MatCopy(a->B, b->B, str));
1800   }
1801   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1806 {
1807   PetscFunctionBegin;
1808   PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1813 {
1814   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1815   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1816   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1824 {
1825   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1826   PetscBLASInt bnz, one                         = 1;
1827   Mat_SeqBAIJ *x, *y;
1828   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1829 
1830   PetscFunctionBegin;
1831   if (str == SAME_NONZERO_PATTERN) {
1832     PetscScalar alpha = a;
1833     x                 = (Mat_SeqBAIJ *)xx->A->data;
1834     y                 = (Mat_SeqBAIJ *)yy->A->data;
1835     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1836     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1837     x = (Mat_SeqBAIJ *)xx->B->data;
1838     y = (Mat_SeqBAIJ *)yy->B->data;
1839     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1840     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1841     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1842   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1843     PetscCall(MatAXPY_Basic(Y, a, X, str));
1844   } else {
1845     Mat       B;
1846     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1847     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1848     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1849     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1850     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1851     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1852     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1853     PetscCall(MatSetType(B, MATMPIBAIJ));
1854     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1855     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1856     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1857     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1858     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1859     PetscCall(MatHeaderMerge(Y, &B));
1860     PetscCall(PetscFree(nnz_d));
1861     PetscCall(PetscFree(nnz_o));
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1867 {
1868   PetscFunctionBegin;
1869   if (PetscDefined(USE_COMPLEX)) {
1870     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1871 
1872     PetscCall(MatConjugate_SeqBAIJ(a->A));
1873     PetscCall(MatConjugate_SeqBAIJ(a->B));
1874   }
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1879 {
1880   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1881 
1882   PetscFunctionBegin;
1883   PetscCall(MatRealPart(a->A));
1884   PetscCall(MatRealPart(a->B));
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1889 {
1890   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1891 
1892   PetscFunctionBegin;
1893   PetscCall(MatImaginaryPart(a->A));
1894   PetscCall(MatImaginaryPart(a->B));
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1899 {
1900   IS       iscol_local;
1901   PetscInt csize;
1902 
1903   PetscFunctionBegin;
1904   PetscCall(ISGetLocalSize(iscol, &csize));
1905   if (call == MAT_REUSE_MATRIX) {
1906     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1907     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1908   } else {
1909     PetscCall(ISAllGather(iscol, &iscol_local));
1910   }
1911   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1912   if (call == MAT_INITIAL_MATRIX) {
1913     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1914     PetscCall(ISDestroy(&iscol_local));
1915   }
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 /*
1920   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1921   in local and then by concatenating the local matrices the end result.
1922   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1923   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1924 */
1925 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1926 {
1927   PetscMPIInt  rank, size;
1928   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1929   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1930   Mat          M, Mreuse;
1931   MatScalar   *vwork, *aa;
1932   MPI_Comm     comm;
1933   IS           isrow_new, iscol_new;
1934   Mat_SeqBAIJ *aij;
1935 
1936   PetscFunctionBegin;
1937   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1938   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1939   PetscCallMPI(MPI_Comm_size(comm, &size));
1940   /* The compression and expansion should be avoided. Doesn't point
1941      out errors, might change the indices, hence buggey */
1942   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1943   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1944 
1945   if (call == MAT_REUSE_MATRIX) {
1946     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1947     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1948     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1949   } else {
1950     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1951   }
1952   PetscCall(ISDestroy(&isrow_new));
1953   PetscCall(ISDestroy(&iscol_new));
1954   /*
1955       m - number of local rows
1956       n - number of columns (same on all processors)
1957       rstart - first row in new global matrix generated
1958   */
1959   PetscCall(MatGetBlockSize(mat, &bs));
1960   PetscCall(MatGetSize(Mreuse, &m, &n));
1961   m = m / bs;
1962   n = n / bs;
1963 
1964   if (call == MAT_INITIAL_MATRIX) {
1965     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1966     ii  = aij->i;
1967     jj  = aij->j;
1968 
1969     /*
1970         Determine the number of non-zeros in the diagonal and off-diagonal
1971         portions of the matrix in order to do correct preallocation
1972     */
1973 
1974     /* first get start and end of "diagonal" columns */
1975     if (csize == PETSC_DECIDE) {
1976       PetscCall(ISGetSize(isrow, &mglobal));
1977       if (mglobal == n * bs) { /* square matrix */
1978         nlocal = m;
1979       } else {
1980         nlocal = n / size + ((n % size) > rank);
1981       }
1982     } else {
1983       nlocal = csize / bs;
1984     }
1985     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1986     rstart = rend - nlocal;
1987     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1988 
1989     /* next, compute all the lengths */
1990     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1991     for (i = 0; i < m; i++) {
1992       jend = ii[i + 1] - ii[i];
1993       olen = 0;
1994       dlen = 0;
1995       for (j = 0; j < jend; j++) {
1996         if (*jj < rstart || *jj >= rend) olen++;
1997         else dlen++;
1998         jj++;
1999       }
2000       olens[i] = olen;
2001       dlens[i] = dlen;
2002     }
2003     PetscCall(MatCreate(comm, &M));
2004     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2005     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
2006     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2007     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2008     PetscCall(PetscFree2(dlens, olens));
2009   } else {
2010     PetscInt ml, nl;
2011 
2012     M = *newmat;
2013     PetscCall(MatGetLocalSize(M, &ml, &nl));
2014     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2015     PetscCall(MatZeroEntries(M));
2016     /*
2017          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2018        rather than the slower MatSetValues().
2019     */
2020     M->was_assembled = PETSC_TRUE;
2021     M->assembled     = PETSC_FALSE;
2022   }
2023   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2024   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2025   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2026   ii  = aij->i;
2027   jj  = aij->j;
2028   aa  = aij->a;
2029   for (i = 0; i < m; i++) {
2030     row   = rstart / bs + i;
2031     nz    = ii[i + 1] - ii[i];
2032     cwork = jj;
2033     jj += nz;
2034     vwork = aa;
2035     aa += nz * bs * bs;
2036     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2037   }
2038 
2039   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2040   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2041   *newmat = M;
2042 
2043   /* save submatrix used in processor for next request */
2044   if (call == MAT_INITIAL_MATRIX) {
2045     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2046     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2052 {
2053   MPI_Comm        comm, pcomm;
2054   PetscInt        clocal_size, nrows;
2055   const PetscInt *rows;
2056   PetscMPIInt     size;
2057   IS              crowp, lcolp;
2058 
2059   PetscFunctionBegin;
2060   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2061   /* make a collective version of 'rowp' */
2062   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2063   if (pcomm == comm) {
2064     crowp = rowp;
2065   } else {
2066     PetscCall(ISGetSize(rowp, &nrows));
2067     PetscCall(ISGetIndices(rowp, &rows));
2068     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2069     PetscCall(ISRestoreIndices(rowp, &rows));
2070   }
2071   PetscCall(ISSetPermutation(crowp));
2072   /* make a local version of 'colp' */
2073   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2074   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2075   if (size == 1) {
2076     lcolp = colp;
2077   } else {
2078     PetscCall(ISAllGather(colp, &lcolp));
2079   }
2080   PetscCall(ISSetPermutation(lcolp));
2081   /* now we just get the submatrix */
2082   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2083   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2084   /* clean up */
2085   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2086   if (size > 1) PetscCall(ISDestroy(&lcolp));
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2091 {
2092   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2093   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2094 
2095   PetscFunctionBegin;
2096   if (nghosts) *nghosts = B->nbs;
2097   if (ghosts) *ghosts = baij->garray;
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2102 {
2103   Mat          B;
2104   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2105   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2106   Mat_SeqAIJ  *b;
2107   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2108   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2109   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2110 
2111   PetscFunctionBegin;
2112   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2113   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2114 
2115   /* ----------------------------------------------------------------
2116      Tell every processor the number of nonzeros per row
2117   */
2118   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2119   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2120   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2121   displs = recvcounts + size;
2122   for (i = 0; i < size; i++) {
2123     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2124     displs[i]     = A->rmap->range[i] / bs;
2125   }
2126   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2127   /* ---------------------------------------------------------------
2128      Create the sequential matrix of the same type as the local block diagonal
2129   */
2130   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2131   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2132   PetscCall(MatSetType(B, MATSEQAIJ));
2133   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2134   b = (Mat_SeqAIJ *)B->data;
2135 
2136   /*--------------------------------------------------------------------
2137     Copy my part of matrix column indices over
2138   */
2139   sendcount  = ad->nz + bd->nz;
2140   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2141   a_jsendbuf = ad->j;
2142   b_jsendbuf = bd->j;
2143   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2144   cnt        = 0;
2145   for (i = 0; i < n; i++) {
2146     /* put in lower diagonal portion */
2147     m = bd->i[i + 1] - bd->i[i];
2148     while (m > 0) {
2149       /* is it above diagonal (in bd (compressed) numbering) */
2150       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2151       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2152       m--;
2153     }
2154 
2155     /* put in diagonal portion */
2156     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2157 
2158     /* put in upper diagonal portion */
2159     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2160   }
2161   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2162 
2163   /*--------------------------------------------------------------------
2164     Gather all column indices to all processors
2165   */
2166   for (i = 0; i < size; i++) {
2167     recvcounts[i] = 0;
2168     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2169   }
2170   displs[0] = 0;
2171   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2172   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2173   /*--------------------------------------------------------------------
2174     Assemble the matrix into useable form (note numerical values not yet set)
2175   */
2176   /* set the b->ilen (length of each row) values */
2177   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2178   /* set the b->i indices */
2179   b->i[0] = 0;
2180   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2181   PetscCall(PetscFree(lens));
2182   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2183   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2184   PetscCall(PetscFree(recvcounts));
2185 
2186   PetscCall(MatPropagateSymmetryOptions(A, B));
2187   *newmat = B;
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2192 {
2193   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2194   Vec          bb1 = NULL;
2195 
2196   PetscFunctionBegin;
2197   if (flag == SOR_APPLY_UPPER) {
2198     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2199     PetscFunctionReturn(0);
2200   }
2201 
2202   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2203 
2204   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2205     if (flag & SOR_ZERO_INITIAL_GUESS) {
2206       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2207       its--;
2208     }
2209 
2210     while (its--) {
2211       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2212       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2213 
2214       /* update rhs: bb1 = bb - B*x */
2215       PetscCall(VecScale(mat->lvec, -1.0));
2216       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2217 
2218       /* local sweep */
2219       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2220     }
2221   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2222     if (flag & SOR_ZERO_INITIAL_GUESS) {
2223       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2224       its--;
2225     }
2226     while (its--) {
2227       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2228       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2229 
2230       /* update rhs: bb1 = bb - B*x */
2231       PetscCall(VecScale(mat->lvec, -1.0));
2232       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2233 
2234       /* local sweep */
2235       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2236     }
2237   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2238     if (flag & SOR_ZERO_INITIAL_GUESS) {
2239       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2240       its--;
2241     }
2242     while (its--) {
2243       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2244       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2245 
2246       /* update rhs: bb1 = bb - B*x */
2247       PetscCall(VecScale(mat->lvec, -1.0));
2248       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2249 
2250       /* local sweep */
2251       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2252     }
2253   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2254 
2255   PetscCall(VecDestroy(&bb1));
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2260 {
2261   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2262   PetscInt     m, N, i, *garray = aij->garray;
2263   PetscInt     ib, jb, bs = A->rmap->bs;
2264   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2265   MatScalar   *a_val = a_aij->a;
2266   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2267   MatScalar   *b_val = b_aij->a;
2268   PetscReal   *work;
2269 
2270   PetscFunctionBegin;
2271   PetscCall(MatGetSize(A, &m, &N));
2272   PetscCall(PetscCalloc1(N, &work));
2273   if (type == NORM_2) {
2274     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2275       for (jb = 0; jb < bs; jb++) {
2276         for (ib = 0; ib < bs; ib++) {
2277           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2278           a_val++;
2279         }
2280       }
2281     }
2282     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2283       for (jb = 0; jb < bs; jb++) {
2284         for (ib = 0; ib < bs; ib++) {
2285           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2286           b_val++;
2287         }
2288       }
2289     }
2290   } else if (type == NORM_1) {
2291     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2292       for (jb = 0; jb < bs; jb++) {
2293         for (ib = 0; ib < bs; ib++) {
2294           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2295           a_val++;
2296         }
2297       }
2298     }
2299     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2300       for (jb = 0; jb < bs; jb++) {
2301         for (ib = 0; ib < bs; ib++) {
2302           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2303           b_val++;
2304         }
2305       }
2306     }
2307   } else if (type == NORM_INFINITY) {
2308     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2309       for (jb = 0; jb < bs; jb++) {
2310         for (ib = 0; ib < bs; ib++) {
2311           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2312           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2313           a_val++;
2314         }
2315       }
2316     }
2317     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2318       for (jb = 0; jb < bs; jb++) {
2319         for (ib = 0; ib < bs; ib++) {
2320           int col   = garray[b_aij->j[i]] * bs + jb;
2321           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2322           b_val++;
2323         }
2324       }
2325     }
2326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2327     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2328       for (jb = 0; jb < bs; jb++) {
2329         for (ib = 0; ib < bs; ib++) {
2330           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2331           a_val++;
2332         }
2333       }
2334     }
2335     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2336       for (jb = 0; jb < bs; jb++) {
2337         for (ib = 0; ib < bs; ib++) {
2338           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2339           b_val++;
2340         }
2341       }
2342     }
2343   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2344     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2345       for (jb = 0; jb < bs; jb++) {
2346         for (ib = 0; ib < bs; ib++) {
2347           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2348           a_val++;
2349         }
2350       }
2351     }
2352     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2353       for (jb = 0; jb < bs; jb++) {
2354         for (ib = 0; ib < bs; ib++) {
2355           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2356           b_val++;
2357         }
2358       }
2359     }
2360   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2361   if (type == NORM_INFINITY) {
2362     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2363   } else {
2364     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2365   }
2366   PetscCall(PetscFree(work));
2367   if (type == NORM_2) {
2368     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2369   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2370     for (i = 0; i < N; i++) reductions[i] /= m;
2371   }
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2376 {
2377   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2378 
2379   PetscFunctionBegin;
2380   PetscCall(MatInvertBlockDiagonal(a->A, values));
2381   A->factorerrortype             = a->A->factorerrortype;
2382   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2383   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2388 {
2389   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2390   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2391 
2392   PetscFunctionBegin;
2393   if (!Y->preallocated) {
2394     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2395   } else if (!aij->nz) {
2396     PetscInt nonew = aij->nonew;
2397     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2398     aij->nonew = nonew;
2399   }
2400   PetscCall(MatShift_Basic(Y, a));
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2405 {
2406   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2407 
2408   PetscFunctionBegin;
2409   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2410   PetscCall(MatMissingDiagonal(a->A, missing, d));
2411   if (d) {
2412     PetscInt rstart;
2413     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2414     *d += rstart / A->rmap->bs;
2415   }
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2420 {
2421   PetscFunctionBegin;
2422   *a = ((Mat_MPIBAIJ *)A->data)->A;
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 /* -------------------------------------------------------------------*/
2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2428                                        MatGetRow_MPIBAIJ,
2429                                        MatRestoreRow_MPIBAIJ,
2430                                        MatMult_MPIBAIJ,
2431                                        /* 4*/ MatMultAdd_MPIBAIJ,
2432                                        MatMultTranspose_MPIBAIJ,
2433                                        MatMultTransposeAdd_MPIBAIJ,
2434                                        NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        /*10*/ NULL,
2438                                        NULL,
2439                                        NULL,
2440                                        MatSOR_MPIBAIJ,
2441                                        MatTranspose_MPIBAIJ,
2442                                        /*15*/ MatGetInfo_MPIBAIJ,
2443                                        MatEqual_MPIBAIJ,
2444                                        MatGetDiagonal_MPIBAIJ,
2445                                        MatDiagonalScale_MPIBAIJ,
2446                                        MatNorm_MPIBAIJ,
2447                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2448                                        MatAssemblyEnd_MPIBAIJ,
2449                                        MatSetOption_MPIBAIJ,
2450                                        MatZeroEntries_MPIBAIJ,
2451                                        /*24*/ MatZeroRows_MPIBAIJ,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        /*29*/ MatSetUp_MPIBAIJ,
2457                                        NULL,
2458                                        NULL,
2459                                        MatGetDiagonalBlock_MPIBAIJ,
2460                                        NULL,
2461                                        /*34*/ MatDuplicate_MPIBAIJ,
2462                                        NULL,
2463                                        NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        /*39*/ MatAXPY_MPIBAIJ,
2467                                        MatCreateSubMatrices_MPIBAIJ,
2468                                        MatIncreaseOverlap_MPIBAIJ,
2469                                        MatGetValues_MPIBAIJ,
2470                                        MatCopy_MPIBAIJ,
2471                                        /*44*/ NULL,
2472                                        MatScale_MPIBAIJ,
2473                                        MatShift_MPIBAIJ,
2474                                        NULL,
2475                                        MatZeroRowsColumns_MPIBAIJ,
2476                                        /*49*/ NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2482                                        NULL,
2483                                        MatSetUnfactored_MPIBAIJ,
2484                                        MatPermute_MPIBAIJ,
2485                                        MatSetValuesBlocked_MPIBAIJ,
2486                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2487                                        MatDestroy_MPIBAIJ,
2488                                        MatView_MPIBAIJ,
2489                                        NULL,
2490                                        NULL,
2491                                        /*64*/ NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        /*74*/ NULL,
2502                                        MatFDColoringApply_BAIJ,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        /*79*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        MatLoad_MPIBAIJ,
2511                                        /*84*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        /*89*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*94*/ NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        /*99*/ NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        MatConjugate_MPIBAIJ,
2530                                        NULL,
2531                                        /*104*/ NULL,
2532                                        MatRealPart_MPIBAIJ,
2533                                        MatImaginaryPart_MPIBAIJ,
2534                                        NULL,
2535                                        NULL,
2536                                        /*109*/ NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        MatMissingDiagonal_MPIBAIJ,
2541                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2542                                        NULL,
2543                                        MatGetGhosts_MPIBAIJ,
2544                                        NULL,
2545                                        NULL,
2546                                        /*119*/ NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        MatGetMultiProcBlock_MPIBAIJ,
2551                                        /*124*/ NULL,
2552                                        MatGetColumnReductions_MPIBAIJ,
2553                                        MatInvertBlockDiagonal_MPIBAIJ,
2554                                        NULL,
2555                                        NULL,
2556                                        /*129*/ NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        /*134*/ NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        /*139*/ MatSetBlockSizes_Default,
2567                                        NULL,
2568                                        NULL,
2569                                        MatFDColoringSetUp_MPIXAIJ,
2570                                        NULL,
2571                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        /*150*/ NULL};
2578 
2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2581 
2582 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2583 {
2584   PetscInt        m, rstart, cstart, cend;
2585   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2586   const PetscInt *JJ          = NULL;
2587   PetscScalar    *values      = NULL;
2588   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2589   PetscBool       nooffprocentries;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2593   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2594   PetscCall(PetscLayoutSetUp(B->rmap));
2595   PetscCall(PetscLayoutSetUp(B->cmap));
2596   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2597   m      = B->rmap->n / bs;
2598   rstart = B->rmap->rstart / bs;
2599   cstart = B->cmap->rstart / bs;
2600   cend   = B->cmap->rend / bs;
2601 
2602   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2603   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2604   for (i = 0; i < m; i++) {
2605     nz = ii[i + 1] - ii[i];
2606     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2607     nz_max = PetscMax(nz_max, nz);
2608     dlen   = 0;
2609     olen   = 0;
2610     JJ     = jj + ii[i];
2611     for (j = 0; j < nz; j++) {
2612       if (*JJ < cstart || *JJ >= cend) olen++;
2613       else dlen++;
2614       JJ++;
2615     }
2616     d_nnz[i] = dlen;
2617     o_nnz[i] = olen;
2618   }
2619   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2620   PetscCall(PetscFree2(d_nnz, o_nnz));
2621 
2622   values = (PetscScalar *)V;
2623   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2624   for (i = 0; i < m; i++) {
2625     PetscInt        row   = i + rstart;
2626     PetscInt        ncols = ii[i + 1] - ii[i];
2627     const PetscInt *icols = jj + ii[i];
2628     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2629       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2630       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2631     } else { /* block ordering does not match so we can only insert one block at a time. */
2632       PetscInt j;
2633       for (j = 0; j < ncols; j++) {
2634         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2635         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2636       }
2637     }
2638   }
2639 
2640   if (!V) PetscCall(PetscFree(values));
2641   nooffprocentries    = B->nooffprocentries;
2642   B->nooffprocentries = PETSC_TRUE;
2643   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2644   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2645   B->nooffprocentries = nooffprocentries;
2646 
2647   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 /*@C
2652    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2653 
2654    Collective
2655 
2656    Input Parameters:
2657 +  B - the matrix
2658 .  bs - the block size
2659 .  i - the indices into j for the start of each local row (starts with zero)
2660 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2661 -  v - optional values in the matrix
2662 
2663    Level: advanced
2664 
2665    Notes:
2666     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2667    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2668    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2669    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2670    block column and the second index is over columns within a block.
2671 
2672    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2673 
2674 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2675 @*/
2676 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2677 {
2678   PetscFunctionBegin;
2679   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2680   PetscValidType(B, 1);
2681   PetscValidLogicalCollectiveInt(B, bs, 2);
2682   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2687 {
2688   Mat_MPIBAIJ *b;
2689   PetscInt     i;
2690   PetscMPIInt  size;
2691 
2692   PetscFunctionBegin;
2693   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2694   PetscCall(PetscLayoutSetUp(B->rmap));
2695   PetscCall(PetscLayoutSetUp(B->cmap));
2696   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2697 
2698   if (d_nnz) {
2699     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2700   }
2701   if (o_nnz) {
2702     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2703   }
2704 
2705   b      = (Mat_MPIBAIJ *)B->data;
2706   b->bs2 = bs * bs;
2707   b->mbs = B->rmap->n / bs;
2708   b->nbs = B->cmap->n / bs;
2709   b->Mbs = B->rmap->N / bs;
2710   b->Nbs = B->cmap->N / bs;
2711 
2712   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2713   b->rstartbs = B->rmap->rstart / bs;
2714   b->rendbs   = B->rmap->rend / bs;
2715   b->cstartbs = B->cmap->rstart / bs;
2716   b->cendbs   = B->cmap->rend / bs;
2717 
2718 #if defined(PETSC_USE_CTABLE)
2719   PetscCall(PetscTableDestroy(&b->colmap));
2720 #else
2721   PetscCall(PetscFree(b->colmap));
2722 #endif
2723   PetscCall(PetscFree(b->garray));
2724   PetscCall(VecDestroy(&b->lvec));
2725   PetscCall(VecScatterDestroy(&b->Mvctx));
2726 
2727   /* Because the B will have been resized we simply destroy it and create a new one each time */
2728   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2729   PetscCall(MatDestroy(&b->B));
2730   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2731   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2732   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2733 
2734   if (!B->preallocated) {
2735     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2736     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2737     PetscCall(MatSetType(b->A, MATSEQBAIJ));
2738     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2739   }
2740 
2741   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2742   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2743   B->preallocated  = PETSC_TRUE;
2744   B->was_assembled = PETSC_FALSE;
2745   B->assembled     = PETSC_FALSE;
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2750 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2751 
2752 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2753 {
2754   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2755   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2756   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2757   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2758 
2759   PetscFunctionBegin;
2760   PetscCall(PetscMalloc1(M + 1, &ii));
2761   ii[0] = 0;
2762   for (i = 0; i < M; i++) {
2763     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2764     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2765     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2766     /* remove one from count of matrix has diagonal */
2767     for (j = id[i]; j < id[i + 1]; j++) {
2768       if (jd[j] == i) {
2769         ii[i + 1]--;
2770         break;
2771       }
2772     }
2773   }
2774   PetscCall(PetscMalloc1(ii[M], &jj));
2775   cnt = 0;
2776   for (i = 0; i < M; i++) {
2777     for (j = io[i]; j < io[i + 1]; j++) {
2778       if (garray[jo[j]] > rstart) break;
2779       jj[cnt++] = garray[jo[j]];
2780     }
2781     for (k = id[i]; k < id[i + 1]; k++) {
2782       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2783     }
2784     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2785   }
2786   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2791 
2792 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2793 
2794 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2795 {
2796   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2797   Mat_MPIAIJ  *b;
2798   Mat          B;
2799 
2800   PetscFunctionBegin;
2801   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2802 
2803   if (reuse == MAT_REUSE_MATRIX) {
2804     B = *newmat;
2805   } else {
2806     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2807     PetscCall(MatSetType(B, MATMPIAIJ));
2808     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2809     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2810     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2811     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2812   }
2813   b = (Mat_MPIAIJ *)B->data;
2814 
2815   if (reuse == MAT_REUSE_MATRIX) {
2816     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2818   } else {
2819     PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd;
2820     PetscCall(MatDestroy(&b->A));
2821     PetscCall(MatDestroy(&b->B));
2822     PetscCall(MatDisAssemble_MPIBAIJ(A));
2823     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2824     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2825     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2826     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2827     A->symmetric              = sym;
2828     A->hermitian              = hermitian;
2829     A->structurally_symmetric = structurally_symmetric;
2830     A->spd                    = spd;
2831   }
2832   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2833   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2834 
2835   if (reuse == MAT_INPLACE_MATRIX) {
2836     PetscCall(MatHeaderReplace(A, &B));
2837   } else {
2838     *newmat = B;
2839   }
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 /*MC
2844    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2845 
2846    Options Database Keys:
2847 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2848 . -mat_block_size <bs> - set the blocksize used to store the matrix
2849 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2850 - -mat_use_hash_table <fact> - set hash table factor
2851 
2852    Level: beginner
2853 
2854    Note:
2855     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2856     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2857 
2858 .seealso: `Mat`, MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2859 M*/
2860 
2861 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2862 
2863 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2864 {
2865   Mat_MPIBAIJ *b;
2866   PetscBool    flg = PETSC_FALSE;
2867 
2868   PetscFunctionBegin;
2869   PetscCall(PetscNew(&b));
2870   B->data = (void *)b;
2871 
2872   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2873   B->assembled = PETSC_FALSE;
2874 
2875   B->insertmode = NOT_SET_VALUES;
2876   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2877   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2878 
2879   /* build local table of row and column ownerships */
2880   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2881 
2882   /* build cache for off array entries formed */
2883   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2884 
2885   b->donotstash  = PETSC_FALSE;
2886   b->colmap      = NULL;
2887   b->garray      = NULL;
2888   b->roworiented = PETSC_TRUE;
2889 
2890   /* stuff used in block assembly */
2891   b->barray = NULL;
2892 
2893   /* stuff used for matrix vector multiply */
2894   b->lvec  = NULL;
2895   b->Mvctx = NULL;
2896 
2897   /* stuff for MatGetRow() */
2898   b->rowindices   = NULL;
2899   b->rowvalues    = NULL;
2900   b->getrowactive = PETSC_FALSE;
2901 
2902   /* hash table stuff */
2903   b->ht           = NULL;
2904   b->hd           = NULL;
2905   b->ht_size      = 0;
2906   b->ht_flag      = PETSC_FALSE;
2907   b->ht_fact      = 0;
2908   b->ht_total_ct  = 0;
2909   b->ht_insert_ct = 0;
2910 
2911   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2912   b->ijonly = PETSC_FALSE;
2913 
2914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2917 #if defined(PETSC_HAVE_HYPRE)
2918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2919 #endif
2920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2927   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2928 
2929   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2930   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2931   if (flg) {
2932     PetscReal fact = 1.39;
2933     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2934     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2935     if (fact <= 1.0) fact = 1.39;
2936     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2937     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2938   }
2939   PetscOptionsEnd();
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 /*MC
2944    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2945 
2946    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2947    and `MATMPIBAIJ` otherwise.
2948 
2949    Options Database Keys:
2950 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2951 
2952   Level: beginner
2953 
2954 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2955 M*/
2956 
2957 /*@C
2958    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2959    (block compressed row).
2960 
2961    Collective
2962 
2963    Input Parameters:
2964 +  B - the matrix
2965 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2966           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2967 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2968            submatrix  (same for all local rows)
2969 .  d_nnz - array containing the number of block nonzeros in the various block rows
2970            of the in diagonal portion of the local (possibly different for each block
2971            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2972            set it even if it is zero.
2973 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2974            submatrix (same for all local rows).
2975 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2976            off-diagonal portion of the local submatrix (possibly different for
2977            each block row) or `NULL`.
2978 
2979    If the *_nnz parameter is given then the *_nz parameter is ignored
2980 
2981    Options Database Keys:
2982 +   -mat_block_size - size of the blocks to use
2983 -   -mat_use_hash_table <fact> - set hash table factor
2984 
2985    Level: intermediate
2986 
2987    Notes:
2988    For good matrix assembly performance
2989    the user should preallocate the matrix storage by setting the parameters
2990    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2991    performance can be increased by more than a factor of 50.
2992 
2993    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2994    than it must be used on all processors that share the object for that argument.
2995 
2996    Storage Information:
2997    For a square global matrix we define each processor's diagonal portion
2998    to be its local rows and the corresponding columns (a square submatrix);
2999    each processor's off-diagonal portion encompasses the remainder of the
3000    local matrix (a rectangular submatrix).
3001 
3002    The user can specify preallocated storage for the diagonal part of
3003    the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3004    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3005    memory allocation.  Likewise, specify preallocated storage for the
3006    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3007 
3008    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3009    the figure below we depict these three local rows and all columns (0-11).
3010 
3011 .vb
3012            0 1 2 3 4 5 6 7 8 9 10 11
3013           --------------------------
3014    row 3  |o o o d d d o o o o  o  o
3015    row 4  |o o o d d d o o o o  o  o
3016    row 5  |o o o d d d o o o o  o  o
3017           --------------------------
3018 .ve
3019 
3020    Thus, any entries in the d locations are stored in the d (diagonal)
3021    submatrix, and any entries in the o locations are stored in the
3022    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3023    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3024 
3025    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3026    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3027    In general, for PDE problems in which most nonzeros are near the diagonal,
3028    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
3029    or you will get TERRIBLE performance; see the users' manual chapter on
3030    matrices.
3031 
3032    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3033    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3034    You can also run with the option `-info` and look for messages with the string
3035    malloc in them to see if additional memory allocation was needed.
3036 
3037 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3038 @*/
3039 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3040 {
3041   PetscFunctionBegin;
3042   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3043   PetscValidType(B, 1);
3044   PetscValidLogicalCollectiveInt(B, bs, 2);
3045   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 /*@C
3050    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3051    (block compressed row).
3052 
3053    Collective
3054 
3055    Input Parameters:
3056 +  comm - MPI communicator
3057 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3058           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3059 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3060            This value should be the same as the local size used in creating the
3061            y vector for the matrix-vector product y = Ax.
3062 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3063            This value should be the same as the local size used in creating the
3064            x vector for the matrix-vector product y = Ax.
3065 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3066 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3067 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3068            submatrix  (same for all local rows)
3069 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3070            of the in diagonal portion of the local (possibly different for each block
3071            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3072            and set it even if it is zero.
3073 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3074            submatrix (same for all local rows).
3075 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3076            off-diagonal portion of the local submatrix (possibly different for
3077            each block row) or NULL.
3078 
3079    Output Parameter:
3080 .  A - the matrix
3081 
3082    Options Database Keys:
3083 +   -mat_block_size - size of the blocks to use
3084 -   -mat_use_hash_table <fact> - set hash table factor
3085 
3086    Level: intermediate
3087 
3088    Notes:
3089    For good matrix assembly performance
3090    the user should preallocate the matrix storage by setting the parameters
3091    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3092    performance can be increased by more than a factor of 50.
3093 
3094    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3095    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3096    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3097 
3098    If the *_nnz parameter is given then the *_nz parameter is ignored
3099 
3100    A nonzero block is any block that as 1 or more nonzeros in it
3101 
3102    The user MUST specify either the local or global matrix dimensions
3103    (possibly both).
3104 
3105    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3106    than it must be used on all processors that share the object for that argument.
3107 
3108    Storage Information:
3109    For a square global matrix we define each processor's diagonal portion
3110    to be its local rows and the corresponding columns (a square submatrix);
3111    each processor's off-diagonal portion encompasses the remainder of the
3112    local matrix (a rectangular submatrix).
3113 
3114    The user can specify preallocated storage for the diagonal part of
3115    the local submatrix with either d_nz or d_nnz (not both).  Set
3116    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3117    memory allocation.  Likewise, specify preallocated storage for the
3118    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3119 
3120    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3121    the figure below we depict these three local rows and all columns (0-11).
3122 
3123 .vb
3124            0 1 2 3 4 5 6 7 8 9 10 11
3125           --------------------------
3126    row 3  |o o o d d d o o o o  o  o
3127    row 4  |o o o d d d o o o o  o  o
3128    row 5  |o o o d d d o o o o  o  o
3129           --------------------------
3130 .ve
3131 
3132    Thus, any entries in the d locations are stored in the d (diagonal)
3133    submatrix, and any entries in the o locations are stored in the
3134    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3135    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3136 
3137    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3138    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3139    In general, for PDE problems in which most nonzeros are near the diagonal,
3140    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
3141    or you will get TERRIBLE performance; see the users' manual chapter on
3142    matrices.
3143 
3144 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3145 @*/
3146 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3147 {
3148   PetscMPIInt size;
3149 
3150   PetscFunctionBegin;
3151   PetscCall(MatCreate(comm, A));
3152   PetscCall(MatSetSizes(*A, m, n, M, N));
3153   PetscCallMPI(MPI_Comm_size(comm, &size));
3154   if (size > 1) {
3155     PetscCall(MatSetType(*A, MATMPIBAIJ));
3156     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3157   } else {
3158     PetscCall(MatSetType(*A, MATSEQBAIJ));
3159     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3160   }
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3165 {
3166   Mat          mat;
3167   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3168   PetscInt     len = 0;
3169 
3170   PetscFunctionBegin;
3171   *newmat = NULL;
3172   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3173   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3174   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3175 
3176   mat->factortype   = matin->factortype;
3177   mat->preallocated = PETSC_TRUE;
3178   mat->assembled    = PETSC_TRUE;
3179   mat->insertmode   = NOT_SET_VALUES;
3180 
3181   a             = (Mat_MPIBAIJ *)mat->data;
3182   mat->rmap->bs = matin->rmap->bs;
3183   a->bs2        = oldmat->bs2;
3184   a->mbs        = oldmat->mbs;
3185   a->nbs        = oldmat->nbs;
3186   a->Mbs        = oldmat->Mbs;
3187   a->Nbs        = oldmat->Nbs;
3188 
3189   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3190   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3191 
3192   a->size         = oldmat->size;
3193   a->rank         = oldmat->rank;
3194   a->donotstash   = oldmat->donotstash;
3195   a->roworiented  = oldmat->roworiented;
3196   a->rowindices   = NULL;
3197   a->rowvalues    = NULL;
3198   a->getrowactive = PETSC_FALSE;
3199   a->barray       = NULL;
3200   a->rstartbs     = oldmat->rstartbs;
3201   a->rendbs       = oldmat->rendbs;
3202   a->cstartbs     = oldmat->cstartbs;
3203   a->cendbs       = oldmat->cendbs;
3204 
3205   /* hash table stuff */
3206   a->ht           = NULL;
3207   a->hd           = NULL;
3208   a->ht_size      = 0;
3209   a->ht_flag      = oldmat->ht_flag;
3210   a->ht_fact      = oldmat->ht_fact;
3211   a->ht_total_ct  = 0;
3212   a->ht_insert_ct = 0;
3213 
3214   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3215   if (oldmat->colmap) {
3216 #if defined(PETSC_USE_CTABLE)
3217     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
3218 #else
3219     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3220     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3221 #endif
3222   } else a->colmap = NULL;
3223 
3224   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3225     PetscCall(PetscMalloc1(len, &a->garray));
3226     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3227   } else a->garray = NULL;
3228 
3229   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3230   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3231   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3232 
3233   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3234   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3235   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3236   *newmat = mat;
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3241 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3242 {
3243   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3244   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3245   PetscScalar *matvals;
3246 
3247   PetscFunctionBegin;
3248   PetscCall(PetscViewerSetUp(viewer));
3249 
3250   /* read in matrix header */
3251   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3252   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3253   M  = header[1];
3254   N  = header[2];
3255   nz = header[3];
3256   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3257   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3258   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3259 
3260   /* set block sizes from the viewer's .info file */
3261   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3262   /* set local sizes if not set already */
3263   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3264   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3265   /* set global sizes if not set already */
3266   if (mat->rmap->N < 0) mat->rmap->N = M;
3267   if (mat->cmap->N < 0) mat->cmap->N = N;
3268   PetscCall(PetscLayoutSetUp(mat->rmap));
3269   PetscCall(PetscLayoutSetUp(mat->cmap));
3270 
3271   /* check if the matrix sizes are correct */
3272   PetscCall(MatGetSize(mat, &rows, &cols));
3273   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3274   PetscCall(MatGetBlockSize(mat, &bs));
3275   PetscCall(MatGetLocalSize(mat, &m, &n));
3276   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3277   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3278   mbs = m / bs;
3279   nbs = n / bs;
3280 
3281   /* read in row lengths and build row indices */
3282   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3283   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3284   rowidxs[0] = 0;
3285   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3286   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3287   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3288 
3289   /* read in column indices and matrix values */
3290   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3291   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3292   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3293 
3294   {                /* preallocate matrix storage */
3295     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3296     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3297     PetscBool  sbaij, done;
3298     PetscInt  *d_nnz, *o_nnz;
3299 
3300     PetscCall(PetscBTCreate(nbs, &bt));
3301     PetscCall(PetscHSetICreate(&ht));
3302     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3303     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3304     for (i = 0; i < mbs; i++) {
3305       PetscCall(PetscBTMemzero(nbs, bt));
3306       PetscCall(PetscHSetIClear(ht));
3307       for (k = 0; k < bs; k++) {
3308         PetscInt row = bs * i + k;
3309         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3310           PetscInt col = colidxs[j];
3311           if (!sbaij || col >= row) {
3312             if (col >= cs && col < ce) {
3313               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3314             } else {
3315               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3316               if (done) o_nnz[i]++;
3317             }
3318           }
3319         }
3320       }
3321     }
3322     PetscCall(PetscBTDestroy(&bt));
3323     PetscCall(PetscHSetIDestroy(&ht));
3324     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3325     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3326     PetscCall(PetscFree2(d_nnz, o_nnz));
3327   }
3328 
3329   /* store matrix values */
3330   for (i = 0; i < m; i++) {
3331     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3332     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3333   }
3334 
3335   PetscCall(PetscFree(rowidxs));
3336   PetscCall(PetscFree2(colidxs, matvals));
3337   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3338   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3339   PetscFunctionReturn(0);
3340 }
3341 
3342 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3343 {
3344   PetscBool isbinary;
3345 
3346   PetscFunctionBegin;
3347   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3348   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3349   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 /*@
3354    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3355 
3356    Input Parameters:
3357 +  mat  - the matrix
3358 -  fact - factor
3359 
3360    Options Database Key:
3361 .  -mat_use_hash_table <fact> - provide the factor
3362 
3363    Level: advanced
3364 
3365 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3366 @*/
3367 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3368 {
3369   PetscFunctionBegin;
3370   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3371   PetscFunctionReturn(0);
3372 }
3373 
3374 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3375 {
3376   Mat_MPIBAIJ *baij;
3377 
3378   PetscFunctionBegin;
3379   baij          = (Mat_MPIBAIJ *)mat->data;
3380   baij->ht_fact = fact;
3381   PetscFunctionReturn(0);
3382 }
3383 
3384 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3385 {
3386   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3387   PetscBool    flg;
3388 
3389   PetscFunctionBegin;
3390   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3391   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3392   if (Ad) *Ad = a->A;
3393   if (Ao) *Ao = a->B;
3394   if (colmap) *colmap = a->garray;
3395   PetscFunctionReturn(0);
3396 }
3397 
3398 /*
3399     Special version for direct calls from Fortran (to eliminate two function call overheads
3400 */
3401 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3402   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3403 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3404   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3405 #endif
3406 
3407 /*@C
3408   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3409 
3410   Collective
3411 
3412   Input Parameters:
3413 + mat - the matrix
3414 . min - number of input rows
3415 . im - input rows
3416 . nin - number of input columns
3417 . in - input columns
3418 . v - numerical values input
3419 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3420 
3421   Developer Note:
3422     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3423 
3424   Level: advanced
3425 
3426 .seealso: `Mat`, `MatSetValuesBlocked()`
3427 @*/
3428 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3429 {
3430   /* convert input arguments to C version */
3431   Mat        mat = *matin;
3432   PetscInt   m = *min, n = *nin;
3433   InsertMode addv = *addvin;
3434 
3435   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3436   const MatScalar *value;
3437   MatScalar       *barray      = baij->barray;
3438   PetscBool        roworiented = baij->roworiented;
3439   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3440   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3441   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3442 
3443   PetscFunctionBegin;
3444   /* tasks normally handled by MatSetValuesBlocked() */
3445   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3446   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3447   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3448   if (mat->assembled) {
3449     mat->was_assembled = PETSC_TRUE;
3450     mat->assembled     = PETSC_FALSE;
3451   }
3452   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3453 
3454   if (!barray) {
3455     PetscCall(PetscMalloc1(bs2, &barray));
3456     baij->barray = barray;
3457   }
3458 
3459   if (roworiented) stepval = (n - 1) * bs;
3460   else stepval = (m - 1) * bs;
3461 
3462   for (i = 0; i < m; i++) {
3463     if (im[i] < 0) continue;
3464     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3465     if (im[i] >= rstart && im[i] < rend) {
3466       row = im[i] - rstart;
3467       for (j = 0; j < n; j++) {
3468         /* If NumCol = 1 then a copy is not required */
3469         if ((roworiented) && (n == 1)) {
3470           barray = (MatScalar *)v + i * bs2;
3471         } else if ((!roworiented) && (m == 1)) {
3472           barray = (MatScalar *)v + j * bs2;
3473         } else { /* Here a copy is required */
3474           if (roworiented) {
3475             value = v + i * (stepval + bs) * bs + j * bs;
3476           } else {
3477             value = v + j * (stepval + bs) * bs + i * bs;
3478           }
3479           for (ii = 0; ii < bs; ii++, value += stepval) {
3480             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3481           }
3482           barray -= bs2;
3483         }
3484 
3485         if (in[j] >= cstart && in[j] < cend) {
3486           col = in[j] - cstart;
3487           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3488         } else if (in[j] < 0) {
3489           continue;
3490         } else {
3491           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3492           if (mat->was_assembled) {
3493             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3494 
3495 #if defined(PETSC_USE_DEBUG)
3496   #if defined(PETSC_USE_CTABLE)
3497             {
3498               PetscInt data;
3499               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
3500               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3501             }
3502   #else
3503             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3504   #endif
3505 #endif
3506 #if defined(PETSC_USE_CTABLE)
3507             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
3508             col = (col - 1) / bs;
3509 #else
3510             col = (baij->colmap[in[j]] - 1) / bs;
3511 #endif
3512             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3513               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3514               col = in[j];
3515             }
3516           } else col = in[j];
3517           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3518         }
3519       }
3520     } else {
3521       if (!baij->donotstash) {
3522         if (roworiented) {
3523           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3524         } else {
3525           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3526         }
3527       }
3528     }
3529   }
3530 
3531   /* task normally handled by MatSetValuesBlocked() */
3532   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3533   PetscFunctionReturn(0);
3534 }
3535 
3536 /*@
3537      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3538          CSR format the local rows.
3539 
3540    Collective
3541 
3542    Input Parameters:
3543 +  comm - MPI communicator
3544 .  bs - the block size, only a block size of 1 is supported
3545 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3546 .  n - This value should be the same as the local size used in creating the
3547        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3548        calculated if N is given) For square matrices n is almost always m.
3549 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3550 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3551 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3552 .   j - column indices
3553 -   a - matrix values
3554 
3555    Output Parameter:
3556 .   mat - the matrix
3557 
3558    Level: intermediate
3559 
3560    Notes:
3561        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3562      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3563      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3564 
3565      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3566      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3567      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3568      with column-major ordering within blocks.
3569 
3570        The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3571 
3572 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3573           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3574 @*/
3575 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3576 {
3577   PetscFunctionBegin;
3578   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3579   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3580   PetscCall(MatCreate(comm, mat));
3581   PetscCall(MatSetSizes(*mat, m, n, M, N));
3582   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3583   PetscCall(MatSetBlockSize(*mat, bs));
3584   PetscCall(MatSetUp(*mat));
3585   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3586   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3587   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3588   PetscFunctionReturn(0);
3589 }
3590 
3591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3592 {
3593   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3594   PetscInt    *indx;
3595   PetscScalar *values;
3596 
3597   PetscFunctionBegin;
3598   PetscCall(MatGetSize(inmat, &m, &N));
3599   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3600     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3601     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3602     PetscInt    *bindx, rmax = a->rmax, j;
3603     PetscMPIInt  rank, size;
3604 
3605     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3606     mbs = m / bs;
3607     Nbs = N / cbs;
3608     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3609     nbs = n / cbs;
3610 
3611     PetscCall(PetscMalloc1(rmax, &bindx));
3612     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3613 
3614     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3615     PetscCallMPI(MPI_Comm_rank(comm, &size));
3616     if (rank == size - 1) {
3617       /* Check sum(nbs) = Nbs */
3618       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3619     }
3620 
3621     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3622     for (i = 0; i < mbs; i++) {
3623       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3624       nnz = nnz / bs;
3625       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3626       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3627       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3628     }
3629     PetscCall(PetscFree(bindx));
3630 
3631     PetscCall(MatCreate(comm, outmat));
3632     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3633     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3634     PetscCall(MatSetType(*outmat, MATBAIJ));
3635     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3636     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3637     MatPreallocateEnd(dnz, onz);
3638     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3639   }
3640 
3641   /* numeric phase */
3642   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3643   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3644 
3645   for (i = 0; i < m; i++) {
3646     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3647     Ii = i + rstart;
3648     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3649     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3650   }
3651   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3652   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3653   PetscFunctionReturn(0);
3654 }
3655