xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12 #if defined(PETSC_USE_LOG)
13   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
14 #endif
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(MatStashDestroy_Private(&mat->bstash));
17   PetscCall(MatDestroy(&baij->A));
18   PetscCall(MatDestroy(&baij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&baij->colmap));
21 #else
22   PetscCall(PetscFree(baij->colmap));
23 #endif
24   PetscCall(PetscFree(baij->garray));
25   PetscCall(VecDestroy(&baij->lvec));
26   PetscCall(VecScatterDestroy(&baij->Mvctx));
27   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
28   PetscCall(PetscFree(baij->barray));
29   PetscCall(PetscFree2(baij->hd, baij->ht));
30   PetscCall(PetscFree(baij->rangebs));
31   PetscCall(PetscFree(mat->data));
32 
33   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
43 #if defined(PETSC_HAVE_HYPRE)
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
45 #endif
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
51 #define TYPE BAIJ
52 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
53 #undef TYPE
54 
55 #if defined(PETSC_HAVE_HYPRE)
56 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
57 #endif
58 
59 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
60 {
61   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
62   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
63   PetscScalar       *va, *vv;
64   Vec                vB, vA;
65   const PetscScalar *vb;
66 
67   PetscFunctionBegin;
68   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
69   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
70 
71   PetscCall(VecGetArrayWrite(vA, &va));
72   if (idx) {
73     for (i = 0; i < m; i++) {
74       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
75     }
76   }
77 
78   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
79   PetscCall(PetscMalloc1(m, &idxb));
80   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
81 
82   PetscCall(VecGetArrayWrite(v, &vv));
83   PetscCall(VecGetArrayRead(vB, &vb));
84   for (i = 0; i < m; i++) {
85     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
86       vv[i] = vb[i];
87       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
88     } else {
89       vv[i] = va[i];
90       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
91     }
92   }
93   PetscCall(VecRestoreArrayWrite(vA, &vv));
94   PetscCall(VecRestoreArrayWrite(vA, &va));
95   PetscCall(VecRestoreArrayRead(vB, &vb));
96   PetscCall(PetscFree(idxb));
97   PetscCall(VecDestroy(&vA));
98   PetscCall(VecDestroy(&vB));
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
103 {
104   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
105 
106   PetscFunctionBegin;
107   PetscCall(MatStoreValues(aij->A));
108   PetscCall(MatStoreValues(aij->B));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
113 {
114   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
115 
116   PetscFunctionBegin;
117   PetscCall(MatRetrieveValues(aij->A));
118   PetscCall(MatRetrieveValues(aij->B));
119   PetscFunctionReturn(PETSC_SUCCESS);
120 }
121 
122 /*
123      Local utility routine that creates a mapping from the global column
124    number to the local number in the off-diagonal part of the local
125    storage of the matrix.  This is done in a non scalable way since the
126    length of colmap equals the global matrix length.
127 */
128 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
129 {
130   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
131   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
132   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
133 
134   PetscFunctionBegin;
135 #if defined(PETSC_USE_CTABLE)
136   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
137   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
138 #else
139   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
140   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
141 #endif
142   PetscFunctionReturn(PETSC_SUCCESS);
143 }
144 
145 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
146   { \
147     brow = row / bs; \
148     rp   = aj + ai[brow]; \
149     ap   = aa + bs2 * ai[brow]; \
150     rmax = aimax[brow]; \
151     nrow = ailen[brow]; \
152     bcol = col / bs; \
153     ridx = row % bs; \
154     cidx = col % bs; \
155     low  = 0; \
156     high = nrow; \
157     while (high - low > 3) { \
158       t = (low + high) / 2; \
159       if (rp[t] > bcol) high = t; \
160       else low = t; \
161     } \
162     for (_i = low; _i < high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2 * _i + bs * cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value; \
167         else *bap = value; \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
174     N = nrow++ - 1; \
175     /* shift up all the later entries in this row */ \
176     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
177     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
178     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
179     rp[_i]                          = bcol; \
180     ap[bs2 * _i + bs * cidx + ridx] = value; \
181   a_noinsert:; \
182     ailen[brow] = nrow; \
183   }
184 
185 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
186   { \
187     brow = row / bs; \
188     rp   = bj + bi[brow]; \
189     ap   = ba + bs2 * bi[brow]; \
190     rmax = bimax[brow]; \
191     nrow = bilen[brow]; \
192     bcol = col / bs; \
193     ridx = row % bs; \
194     cidx = col % bs; \
195     low  = 0; \
196     high = nrow; \
197     while (high - low > 3) { \
198       t = (low + high) / 2; \
199       if (rp[t] > bcol) high = t; \
200       else low = t; \
201     } \
202     for (_i = low; _i < high; _i++) { \
203       if (rp[_i] > bcol) break; \
204       if (rp[_i] == bcol) { \
205         bap = ap + bs2 * _i + bs * cidx + ridx; \
206         if (addv == ADD_VALUES) *bap += value; \
207         else *bap = value; \
208         goto b_noinsert; \
209       } \
210     } \
211     if (b->nonew == 1) goto b_noinsert; \
212     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
213     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
214     N = nrow++ - 1; \
215     /* shift up all the later entries in this row */ \
216     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
217     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
218     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
219     rp[_i]                          = bcol; \
220     ap[bs2 * _i + bs * cidx + ridx] = value; \
221   b_noinsert:; \
222     bilen[brow] = nrow; \
223   }
224 
225 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
226 {
227   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
228   MatScalar    value;
229   PetscBool    roworiented = baij->roworiented;
230   PetscInt     i, j, row, col;
231   PetscInt     rstart_orig = mat->rmap->rstart;
232   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
233   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
234 
235   /* Some Variables required in the macro */
236   Mat          A     = baij->A;
237   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
238   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
239   MatScalar   *aa = a->a;
240 
241   Mat          B     = baij->B;
242   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
243   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
244   MatScalar   *ba = b->a;
245 
246   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
247   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
248   MatScalar *ap, *bap;
249 
250   PetscFunctionBegin;
251   for (i = 0; i < m; i++) {
252     if (im[i] < 0) continue;
253     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
254     if (im[i] >= rstart_orig && im[i] < rend_orig) {
255       row = im[i] - rstart_orig;
256       for (j = 0; j < n; j++) {
257         if (in[j] >= cstart_orig && in[j] < cend_orig) {
258           col = in[j] - cstart_orig;
259           if (roworiented) value = v[i * n + j];
260           else value = v[i + j * m];
261           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
262         } else if (in[j] < 0) {
263           continue;
264         } else {
265           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
266           if (mat->was_assembled) {
267             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
268 #if defined(PETSC_USE_CTABLE)
269             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
270             col = col - 1;
271 #else
272             col = baij->colmap[in[j] / bs] - 1;
273 #endif
274             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
275               PetscCall(MatDisAssemble_MPIBAIJ(mat));
276               col = in[j];
277               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
278               B     = baij->B;
279               b     = (Mat_SeqBAIJ *)(B)->data;
280               bimax = b->imax;
281               bi    = b->i;
282               bilen = b->ilen;
283               bj    = b->j;
284               ba    = b->a;
285             } else {
286               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
287               col += in[j] % bs;
288             }
289           } else col = in[j];
290           if (roworiented) value = v[i * n + j];
291           else value = v[i + j * m];
292           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
293           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
294         }
295       }
296     } else {
297       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
298       if (!baij->donotstash) {
299         mat->assembled = PETSC_FALSE;
300         if (roworiented) {
301           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
302         } else {
303           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
304         }
305       }
306     }
307   }
308   PetscFunctionReturn(PETSC_SUCCESS);
309 }
310 
311 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
312 {
313   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
314   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
315   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
316   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
317   PetscBool          roworiented = a->roworiented;
318   const PetscScalar *value       = v;
319   MatScalar         *ap, *aa = a->a, *bap;
320 
321   PetscFunctionBegin;
322   rp    = aj + ai[row];
323   ap    = aa + bs2 * ai[row];
324   rmax  = imax[row];
325   nrow  = ailen[row];
326   value = v;
327   low   = 0;
328   high  = nrow;
329   while (high - low > 7) {
330     t = (low + high) / 2;
331     if (rp[t] > col) high = t;
332     else low = t;
333   }
334   for (i = low; i < high; i++) {
335     if (rp[i] > col) break;
336     if (rp[i] == col) {
337       bap = ap + bs2 * i;
338       if (roworiented) {
339         if (is == ADD_VALUES) {
340           for (ii = 0; ii < bs; ii++) {
341             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
342           }
343         } else {
344           for (ii = 0; ii < bs; ii++) {
345             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
346           }
347         }
348       } else {
349         if (is == ADD_VALUES) {
350           for (ii = 0; ii < bs; ii++, value += bs) {
351             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
352             bap += bs;
353           }
354         } else {
355           for (ii = 0; ii < bs; ii++, value += bs) {
356             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
357             bap += bs;
358           }
359         }
360       }
361       goto noinsert2;
362     }
363   }
364   if (nonew == 1) goto noinsert2;
365   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
366   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
367   N = nrow++ - 1;
368   high++;
369   /* shift up all the later entries in this row */
370   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
371   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
372   rp[i] = col;
373   bap   = ap + bs2 * i;
374   if (roworiented) {
375     for (ii = 0; ii < bs; ii++) {
376       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
377     }
378   } else {
379     for (ii = 0; ii < bs; ii++) {
380       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
381     }
382   }
383 noinsert2:;
384   ailen[row] = nrow;
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 /*
389     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
390     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
391 */
392 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
393 {
394   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
395   const PetscScalar *value;
396   MatScalar         *barray      = baij->barray;
397   PetscBool          roworiented = baij->roworiented;
398   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
399   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
400   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
401 
402   PetscFunctionBegin;
403   if (!barray) {
404     PetscCall(PetscMalloc1(bs2, &barray));
405     baij->barray = barray;
406   }
407 
408   if (roworiented) stepval = (n - 1) * bs;
409   else stepval = (m - 1) * bs;
410 
411   for (i = 0; i < m; i++) {
412     if (im[i] < 0) continue;
413     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
414     if (im[i] >= rstart && im[i] < rend) {
415       row = im[i] - rstart;
416       for (j = 0; j < n; j++) {
417         /* If NumCol = 1 then a copy is not required */
418         if ((roworiented) && (n == 1)) {
419           barray = (MatScalar *)v + i * bs2;
420         } else if ((!roworiented) && (m == 1)) {
421           barray = (MatScalar *)v + j * bs2;
422         } else { /* Here a copy is required */
423           if (roworiented) {
424             value = v + (i * (stepval + bs) + j) * bs;
425           } else {
426             value = v + (j * (stepval + bs) + i) * bs;
427           }
428           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
429             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
430             barray += bs;
431           }
432           barray -= bs2;
433         }
434 
435         if (in[j] >= cstart && in[j] < cend) {
436           col = in[j] - cstart;
437           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
438         } else if (in[j] < 0) {
439           continue;
440         } else {
441           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
442           if (mat->was_assembled) {
443             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
444 
445 #if defined(PETSC_USE_DEBUG)
446   #if defined(PETSC_USE_CTABLE)
447             {
448               PetscInt data;
449               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
450               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
451             }
452   #else
453             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
454   #endif
455 #endif
456 #if defined(PETSC_USE_CTABLE)
457             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
458             col = (col - 1) / bs;
459 #else
460             col = (baij->colmap[in[j]] - 1) / bs;
461 #endif
462             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
463               PetscCall(MatDisAssemble_MPIBAIJ(mat));
464               col = in[j];
465             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
466           } else col = in[j];
467           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
468         }
469       }
470     } else {
471       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
472       if (!baij->donotstash) {
473         if (roworiented) {
474           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         } else {
476           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
477         }
478       }
479     }
480   }
481   PetscFunctionReturn(PETSC_SUCCESS);
482 }
483 
484 #define HASH_KEY             0.6180339887
485 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
486 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
487 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
488 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
489 {
490   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
491   PetscBool    roworiented = baij->roworiented;
492   PetscInt     i, j, row, col;
493   PetscInt     rstart_orig = mat->rmap->rstart;
494   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
495   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
496   PetscReal    tmp;
497   MatScalar  **HD       = baij->hd, value;
498   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
499 
500   PetscFunctionBegin;
501   for (i = 0; i < m; i++) {
502     if (PetscDefined(USE_DEBUG)) {
503       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
504       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
505     }
506     row = im[i];
507     if (row >= rstart_orig && row < rend_orig) {
508       for (j = 0; j < n; j++) {
509         col = in[j];
510         if (roworiented) value = v[i * n + j];
511         else value = v[i + j * m];
512         /* Look up PetscInto the Hash Table */
513         key = (row / bs) * Nbs + (col / bs) + 1;
514         h1  = HASH(size, key, tmp);
515 
516         idx = h1;
517         if (PetscDefined(USE_DEBUG)) {
518           insert_ct++;
519           total_ct++;
520           if (HT[idx] != key) {
521             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
522               ;
523             if (idx == size) {
524               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
525                 ;
526               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
527             }
528           }
529         } else if (HT[idx] != key) {
530           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
531             ;
532           if (idx == size) {
533             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
534               ;
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
595               ;
596             if (idx == size) {
597               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
598                 ;
599               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
600             }
601           }
602         } else if (HT[idx] != key) {
603           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
604             ;
605           if (idx == size) {
606             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
607               ;
608             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
609           }
610         }
611         baij_a = HD[idx];
612         if (roworiented) {
613           /*value = v + i*(stepval+bs)*bs + j*bs;*/
614           /* value = v + (i*(stepval+bs)+j)*bs; */
615           value = v_t;
616           v_t += bs;
617           if (addv == ADD_VALUES) {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
620             }
621           } else {
622             for (ii = 0; ii < bs; ii++, value += stepval) {
623               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
624             }
625           }
626         } else {
627           value = v + j * (stepval + bs) * bs + i * bs;
628           if (addv == ADD_VALUES) {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
631             }
632           } else {
633             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
634               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
635             }
636           }
637         }
638       }
639     } else {
640       if (!baij->donotstash) {
641         if (roworiented) {
642           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         } else {
644           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
645         }
646       }
647     }
648   }
649   if (PetscDefined(USE_DEBUG)) {
650     baij->ht_total_ct += total_ct;
651     baij->ht_insert_ct += insert_ct;
652   }
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
657 {
658   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
659   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
660   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
661 
662   PetscFunctionBegin;
663   for (i = 0; i < m; i++) {
664     if (idxm[i] < 0) continue; /* negative row */
665     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
666     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
667       row = idxm[i] - bsrstart;
668       for (j = 0; j < n; j++) {
669         if (idxn[j] < 0) continue; /* negative column */
670         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
671         if (idxn[j] >= bscstart && idxn[j] < bscend) {
672           col = idxn[j] - bscstart;
673           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
674         } else {
675           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
676 #if defined(PETSC_USE_CTABLE)
677           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
678           data--;
679 #else
680           data = baij->colmap[idxn[j] / bs] - 1;
681 #endif
682           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
683           else {
684             col = data + idxn[j] % bs;
685             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
686           }
687         }
688       }
689     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
690   }
691   PetscFunctionReturn(PETSC_SUCCESS);
692 }
693 
694 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
695 {
696   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
697   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
698   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
699   PetscReal    sum = 0.0;
700   MatScalar   *v;
701 
702   PetscFunctionBegin;
703   if (baij->size == 1) {
704     PetscCall(MatNorm(baij->A, type, nrm));
705   } else {
706     if (type == NORM_FROBENIUS) {
707       v  = amat->a;
708       nz = amat->nz * bs2;
709       for (i = 0; i < nz; i++) {
710         sum += PetscRealPart(PetscConj(*v) * (*v));
711         v++;
712       }
713       v  = bmat->a;
714       nz = bmat->nz * bs2;
715       for (i = 0; i < nz; i++) {
716         sum += PetscRealPart(PetscConj(*v) * (*v));
717         v++;
718       }
719       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
720       *nrm = PetscSqrtReal(*nrm);
721     } else if (type == NORM_1) { /* max column sum */
722       PetscReal *tmp, *tmp2;
723       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
724       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
725       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
726       v  = amat->a;
727       jj = amat->j;
728       for (i = 0; i < amat->nz; i++) {
729         for (j = 0; j < bs; j++) {
730           col = bs * (cstart + *jj) + j; /* column index */
731           for (row = 0; row < bs; row++) {
732             tmp[col] += PetscAbsScalar(*v);
733             v++;
734           }
735         }
736         jj++;
737       }
738       v  = bmat->a;
739       jj = bmat->j;
740       for (i = 0; i < bmat->nz; i++) {
741         for (j = 0; j < bs; j++) {
742           col = bs * garray[*jj] + j;
743           for (row = 0; row < bs; row++) {
744             tmp[col] += PetscAbsScalar(*v);
745             v++;
746           }
747         }
748         jj++;
749       }
750       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
751       *nrm = 0.0;
752       for (j = 0; j < mat->cmap->N; j++) {
753         if (tmp2[j] > *nrm) *nrm = tmp2[j];
754       }
755       PetscCall(PetscFree(tmp));
756       PetscCall(PetscFree(tmp2));
757     } else if (type == NORM_INFINITY) { /* max row sum */
758       PetscReal *sums;
759       PetscCall(PetscMalloc1(bs, &sums));
760       sum = 0.0;
761       for (j = 0; j < amat->mbs; j++) {
762         for (row = 0; row < bs; row++) sums[row] = 0.0;
763         v  = amat->a + bs2 * amat->i[j];
764         nz = amat->i[j + 1] - amat->i[j];
765         for (i = 0; i < nz; i++) {
766           for (col = 0; col < bs; col++) {
767             for (row = 0; row < bs; row++) {
768               sums[row] += PetscAbsScalar(*v);
769               v++;
770             }
771           }
772         }
773         v  = bmat->a + bs2 * bmat->i[j];
774         nz = bmat->i[j + 1] - bmat->i[j];
775         for (i = 0; i < nz; i++) {
776           for (col = 0; col < bs; col++) {
777             for (row = 0; row < bs; row++) {
778               sums[row] += PetscAbsScalar(*v);
779               v++;
780             }
781           }
782         }
783         for (row = 0; row < bs; row++) {
784           if (sums[row] > sum) sum = sums[row];
785         }
786       }
787       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
788       PetscCall(PetscFree(sums));
789     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
790   }
791   PetscFunctionReturn(PETSC_SUCCESS);
792 }
793 
794 /*
795   Creates the hash table, and sets the table
796   This table is created only once.
797   If new entried need to be added to the matrix
798   then the hash table has to be destroyed and
799   recreated.
800 */
801 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
802 {
803   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
804   Mat          A = baij->A, B = baij->B;
805   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
806   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
807   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
808   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
809   PetscInt    *HT, key;
810   MatScalar  **HD;
811   PetscReal    tmp;
812 #if defined(PETSC_USE_INFO)
813   PetscInt ct = 0, max = 0;
814 #endif
815 
816   PetscFunctionBegin;
817   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
818 
819   baij->ht_size = (PetscInt)(factor * nz);
820   ht_size       = baij->ht_size;
821 
822   /* Allocate Memory for Hash Table */
823   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
824   HD = baij->hd;
825   HT = baij->ht;
826 
827   /* Loop Over A */
828   for (i = 0; i < a->mbs; i++) {
829     for (j = ai[i]; j < ai[i + 1]; j++) {
830       row = i + rstart;
831       col = aj[j] + cstart;
832 
833       key = row * Nbs + col + 1;
834       h1  = HASH(ht_size, key, tmp);
835       for (k = 0; k < ht_size; k++) {
836         if (!HT[(h1 + k) % ht_size]) {
837           HT[(h1 + k) % ht_size] = key;
838           HD[(h1 + k) % ht_size] = a->a + j * bs2;
839           break;
840 #if defined(PETSC_USE_INFO)
841         } else {
842           ct++;
843 #endif
844         }
845       }
846 #if defined(PETSC_USE_INFO)
847       if (k > max) max = k;
848 #endif
849     }
850   }
851   /* Loop Over B */
852   for (i = 0; i < b->mbs; i++) {
853     for (j = bi[i]; j < bi[i + 1]; j++) {
854       row = i + rstart;
855       col = garray[bj[j]];
856       key = row * Nbs + col + 1;
857       h1  = HASH(ht_size, key, tmp);
858       for (k = 0; k < ht_size; k++) {
859         if (!HT[(h1 + k) % ht_size]) {
860           HT[(h1 + k) % ht_size] = key;
861           HD[(h1 + k) % ht_size] = b->a + j * bs2;
862           break;
863 #if defined(PETSC_USE_INFO)
864         } else {
865           ct++;
866 #endif
867         }
868       }
869 #if defined(PETSC_USE_INFO)
870       if (k > max) max = k;
871 #endif
872     }
873   }
874 
875   /* Print Summary */
876 #if defined(PETSC_USE_INFO)
877   for (i = 0, j = 0; i < ht_size; i++) {
878     if (HT[i]) j++;
879   }
880   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
881 #endif
882   PetscFunctionReturn(PETSC_SUCCESS);
883 }
884 
885 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
886 {
887   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
888   PetscInt     nstash, reallocs;
889 
890   PetscFunctionBegin;
891   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
892 
893   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
894   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
895   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
898   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
899   PetscFunctionReturn(PETSC_SUCCESS);
900 }
901 
902 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
903 {
904   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
905   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
906   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
907   PetscInt    *row, *col;
908   PetscBool    r1, r2, r3, other_disassembled;
909   MatScalar   *val;
910   PetscMPIInt  n;
911 
912   PetscFunctionBegin;
913   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
914   if (!baij->donotstash && !mat->nooffprocentries) {
915     while (1) {
916       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
917       if (!flg) break;
918 
919       for (i = 0; i < n;) {
920         /* Now identify the consecutive vals belonging to the same row */
921         for (j = i, rstart = row[j]; j < n; j++) {
922           if (row[j] != rstart) break;
923         }
924         if (j < n) ncols = j - i;
925         else ncols = n - i;
926         /* Now assemble all these values with a single function call */
927         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
928         i = j;
929       }
930     }
931     PetscCall(MatStashScatterEnd_Private(&mat->stash));
932     /* Now process the block-stash. Since the values are stashed column-oriented,
933        set the roworiented flag to column oriented, and after MatSetValues()
934        restore the original flags */
935     r1 = baij->roworiented;
936     r2 = a->roworiented;
937     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
938 
939     baij->roworiented = PETSC_FALSE;
940     a->roworiented    = PETSC_FALSE;
941 
942     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
943     while (1) {
944       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
945       if (!flg) break;
946 
947       for (i = 0; i < n;) {
948         /* Now identify the consecutive vals belonging to the same row */
949         for (j = i, rstart = row[j]; j < n; j++) {
950           if (row[j] != rstart) break;
951         }
952         if (j < n) ncols = j - i;
953         else ncols = n - i;
954         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
955         i = j;
956       }
957     }
958     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
959 
960     baij->roworiented = r1;
961     a->roworiented    = r2;
962 
963     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
964   }
965 
966   PetscCall(MatAssemblyBegin(baij->A, mode));
967   PetscCall(MatAssemblyEnd(baij->A, mode));
968 
969   /* determine if any processor has disassembled, if so we must
970      also disassemble ourselves, in order that we may reassemble. */
971   /*
972      if nonzero structure of submatrix B cannot change then we know that
973      no processor disassembled thus we can skip this stuff
974   */
975   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
976     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
977     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
978   }
979 
980   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
981   PetscCall(MatAssemblyBegin(baij->B, mode));
982   PetscCall(MatAssemblyEnd(baij->B, mode));
983 
984 #if defined(PETSC_USE_INFO)
985   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
986     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
987 
988     baij->ht_total_ct  = 0;
989     baij->ht_insert_ct = 0;
990   }
991 #endif
992   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
993     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
994 
995     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
996     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
997   }
998 
999   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1000 
1001   baij->rowvalues = NULL;
1002 
1003   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1004   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1005     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1006     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1007   }
1008   PetscFunctionReturn(PETSC_SUCCESS);
1009 }
1010 
1011 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1012 #include <petscdraw.h>
1013 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1014 {
1015   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1016   PetscMPIInt       rank = baij->rank;
1017   PetscInt          bs   = mat->rmap->bs;
1018   PetscBool         iascii, isdraw;
1019   PetscViewer       sviewer;
1020   PetscViewerFormat format;
1021 
1022   PetscFunctionBegin;
1023   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1024   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1025   if (iascii) {
1026     PetscCall(PetscViewerGetFormat(viewer, &format));
1027     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1028       MatInfo info;
1029       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1030       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1031       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1032       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1033                                                    mat->rmap->bs, (double)info.memory));
1034       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1035       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1036       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1037       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1038       PetscCall(PetscViewerFlush(viewer));
1039       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1040       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1041       PetscCall(VecScatterView(baij->Mvctx, viewer));
1042       PetscFunctionReturn(PETSC_SUCCESS);
1043     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1044       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1045       PetscFunctionReturn(PETSC_SUCCESS);
1046     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1047       PetscFunctionReturn(PETSC_SUCCESS);
1048     }
1049   }
1050 
1051   if (isdraw) {
1052     PetscDraw draw;
1053     PetscBool isnull;
1054     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1055     PetscCall(PetscDrawIsNull(draw, &isnull));
1056     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1057   }
1058 
1059   {
1060     /* assemble the entire matrix onto first processor. */
1061     Mat          A;
1062     Mat_SeqBAIJ *Aloc;
1063     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1064     MatScalar   *a;
1065     const char  *matname;
1066 
1067     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1068     /* Perhaps this should be the type of mat? */
1069     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1070     if (rank == 0) {
1071       PetscCall(MatSetSizes(A, M, N, M, N));
1072     } else {
1073       PetscCall(MatSetSizes(A, 0, 0, M, N));
1074     }
1075     PetscCall(MatSetType(A, MATMPIBAIJ));
1076     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1077     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1078 
1079     /* copy over the A part */
1080     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1081     ai   = Aloc->i;
1082     aj   = Aloc->j;
1083     a    = Aloc->a;
1084     PetscCall(PetscMalloc1(bs, &rvals));
1085 
1086     for (i = 0; i < mbs; i++) {
1087       rvals[0] = bs * (baij->rstartbs + i);
1088       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1089       for (j = ai[i]; j < ai[i + 1]; j++) {
1090         col = (baij->cstartbs + aj[j]) * bs;
1091         for (k = 0; k < bs; k++) {
1092           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1093           col++;
1094           a += bs;
1095         }
1096       }
1097     }
1098     /* copy over the B part */
1099     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1100     ai   = Aloc->i;
1101     aj   = Aloc->j;
1102     a    = Aloc->a;
1103     for (i = 0; i < mbs; i++) {
1104       rvals[0] = bs * (baij->rstartbs + i);
1105       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1106       for (j = ai[i]; j < ai[i + 1]; j++) {
1107         col = baij->garray[aj[j]] * bs;
1108         for (k = 0; k < bs; k++) {
1109           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1110           col++;
1111           a += bs;
1112         }
1113       }
1114     }
1115     PetscCall(PetscFree(rvals));
1116     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1117     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1118     /*
1119        Everyone has to call to draw the matrix since the graphics waits are
1120        synchronized across all processors that share the PetscDraw object
1121     */
1122     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1123     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1124     if (rank == 0) {
1125       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1126       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1127     }
1128     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1129     PetscCall(PetscViewerFlush(viewer));
1130     PetscCall(MatDestroy(&A));
1131   }
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1136 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1137 {
1138   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1139   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1140   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1141   const PetscInt *garray = aij->garray;
1142   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1143   PetscInt       *rowlens, *colidxs;
1144   PetscScalar    *matvals;
1145 
1146   PetscFunctionBegin;
1147   PetscCall(PetscViewerSetUp(viewer));
1148 
1149   M  = mat->rmap->N;
1150   N  = mat->cmap->N;
1151   m  = mat->rmap->n;
1152   rs = mat->rmap->rstart;
1153   cs = mat->cmap->rstart;
1154   bs = mat->rmap->bs;
1155   nz = bs * bs * (A->nz + B->nz);
1156 
1157   /* write matrix header */
1158   header[0] = MAT_FILE_CLASSID;
1159   header[1] = M;
1160   header[2] = N;
1161   header[3] = nz;
1162   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1163   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1164 
1165   /* fill in and store row lengths */
1166   PetscCall(PetscMalloc1(m, &rowlens));
1167   for (cnt = 0, i = 0; i < A->mbs; i++)
1168     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1169   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1170   PetscCall(PetscFree(rowlens));
1171 
1172   /* fill in and store column indices */
1173   PetscCall(PetscMalloc1(nz, &colidxs));
1174   for (cnt = 0, i = 0; i < A->mbs; i++) {
1175     for (k = 0; k < bs; k++) {
1176       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1177         if (garray[B->j[jb]] > cs / bs) break;
1178         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1179       }
1180       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1182       for (; jb < B->i[i + 1]; jb++)
1183         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1184     }
1185   }
1186   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1187   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1188   PetscCall(PetscFree(colidxs));
1189 
1190   /* fill in and store nonzero values */
1191   PetscCall(PetscMalloc1(nz, &matvals));
1192   for (cnt = 0, i = 0; i < A->mbs; i++) {
1193     for (k = 0; k < bs; k++) {
1194       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1195         if (garray[B->j[jb]] > cs / bs) break;
1196         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1197       }
1198       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1199         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1200       for (; jb < B->i[i + 1]; jb++)
1201         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1202     }
1203   }
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1205   PetscCall(PetscFree(matvals));
1206 
1207   /* write block size option to the viewer's .info file */
1208   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1209   PetscFunctionReturn(PETSC_SUCCESS);
1210 }
1211 
1212 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1213 {
1214   PetscBool iascii, isdraw, issocket, isbinary;
1215 
1216   PetscFunctionBegin;
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1219   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1220   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1221   if (iascii || isdraw || issocket) {
1222     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1223   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1224   PetscFunctionReturn(PETSC_SUCCESS);
1225 }
1226 
1227 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1228 {
1229   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1230   PetscInt     nt;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(VecGetLocalSize(xx, &nt));
1234   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1235   PetscCall(VecGetLocalSize(yy, &nt));
1236   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1237   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1238   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1239   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1240   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1245 {
1246   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1247 
1248   PetscFunctionBegin;
1249   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1250   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1251   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1252   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1253   PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255 
1256 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1257 {
1258   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1259 
1260   PetscFunctionBegin;
1261   /* do nondiagonal part */
1262   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1263   /* do local part */
1264   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1265   /* add partial results together */
1266   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1267   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1272 {
1273   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1274 
1275   PetscFunctionBegin;
1276   /* do nondiagonal part */
1277   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1278   /* do local part */
1279   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1280   /* add partial results together */
1281   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1282   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1283   PetscFunctionReturn(PETSC_SUCCESS);
1284 }
1285 
1286 /*
1287   This only works correctly for square matrices where the subblock A->A is the
1288    diagonal block
1289 */
1290 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1291 {
1292   PetscFunctionBegin;
1293   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1294   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1295   PetscFunctionReturn(PETSC_SUCCESS);
1296 }
1297 
1298 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1299 {
1300   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1301 
1302   PetscFunctionBegin;
1303   PetscCall(MatScale(a->A, aa));
1304   PetscCall(MatScale(a->B, aa));
1305   PetscFunctionReturn(PETSC_SUCCESS);
1306 }
1307 
1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1309 {
1310   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1311   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1312   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1313   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1314   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1315 
1316   PetscFunctionBegin;
1317   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1318   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1319   mat->getrowactive = PETSC_TRUE;
1320 
1321   if (!mat->rowvalues && (idx || v)) {
1322     /*
1323         allocate enough space to hold information from the longest row.
1324     */
1325     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1326     PetscInt     max = 1, mbs = mat->mbs, tmp;
1327     for (i = 0; i < mbs; i++) {
1328       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1329       if (max < tmp) max = tmp;
1330     }
1331     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1332   }
1333   lrow = row - brstart;
1334 
1335   pvA = &vworkA;
1336   pcA = &cworkA;
1337   pvB = &vworkB;
1338   pcB = &cworkB;
1339   if (!v) {
1340     pvA = NULL;
1341     pvB = NULL;
1342   }
1343   if (!idx) {
1344     pcA = NULL;
1345     if (!v) pcB = NULL;
1346   }
1347   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1348   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1349   nztot = nzA + nzB;
1350 
1351   cmap = mat->garray;
1352   if (v || idx) {
1353     if (nztot) {
1354       /* Sort by increasing column numbers, assuming A and B already sorted */
1355       PetscInt imark = -1;
1356       if (v) {
1357         *v = v_p = mat->rowvalues;
1358         for (i = 0; i < nzB; i++) {
1359           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1360           else break;
1361         }
1362         imark = i;
1363         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1364         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1365       }
1366       if (idx) {
1367         *idx = idx_p = mat->rowindices;
1368         if (imark > -1) {
1369           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1370         } else {
1371           for (i = 0; i < nzB; i++) {
1372             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1373             else break;
1374           }
1375           imark = i;
1376         }
1377         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1378         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1379       }
1380     } else {
1381       if (idx) *idx = NULL;
1382       if (v) *v = NULL;
1383     }
1384   }
1385   *nz = nztot;
1386   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1387   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1392 {
1393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1394 
1395   PetscFunctionBegin;
1396   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1397   baij->getrowactive = PETSC_FALSE;
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1402 {
1403   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(MatZeroEntries(l->A));
1407   PetscCall(MatZeroEntries(l->B));
1408   PetscFunctionReturn(PETSC_SUCCESS);
1409 }
1410 
1411 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1412 {
1413   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1414   Mat            A = a->A, B = a->B;
1415   PetscLogDouble isend[5], irecv[5];
1416 
1417   PetscFunctionBegin;
1418   info->block_size = (PetscReal)matin->rmap->bs;
1419 
1420   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1421 
1422   isend[0] = info->nz_used;
1423   isend[1] = info->nz_allocated;
1424   isend[2] = info->nz_unneeded;
1425   isend[3] = info->memory;
1426   isend[4] = info->mallocs;
1427 
1428   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1429 
1430   isend[0] += info->nz_used;
1431   isend[1] += info->nz_allocated;
1432   isend[2] += info->nz_unneeded;
1433   isend[3] += info->memory;
1434   isend[4] += info->mallocs;
1435 
1436   if (flag == MAT_LOCAL) {
1437     info->nz_used      = isend[0];
1438     info->nz_allocated = isend[1];
1439     info->nz_unneeded  = isend[2];
1440     info->memory       = isend[3];
1441     info->mallocs      = isend[4];
1442   } else if (flag == MAT_GLOBAL_MAX) {
1443     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1444 
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else if (flag == MAT_GLOBAL_SUM) {
1451     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1452 
1453     info->nz_used      = irecv[0];
1454     info->nz_allocated = irecv[1];
1455     info->nz_unneeded  = irecv[2];
1456     info->memory       = irecv[3];
1457     info->mallocs      = irecv[4];
1458   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1459   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1460   info->fill_ratio_needed = 0;
1461   info->factor_mallocs    = 0;
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1466 {
1467   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1468 
1469   PetscFunctionBegin;
1470   switch (op) {
1471   case MAT_NEW_NONZERO_LOCATIONS:
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1474   case MAT_KEEP_NONZERO_PATTERN:
1475   case MAT_NEW_NONZERO_LOCATION_ERR:
1476     MatCheckPreallocated(A, 1);
1477     PetscCall(MatSetOption(a->A, op, flg));
1478     PetscCall(MatSetOption(a->B, op, flg));
1479     break;
1480   case MAT_ROW_ORIENTED:
1481     MatCheckPreallocated(A, 1);
1482     a->roworiented = flg;
1483 
1484     PetscCall(MatSetOption(a->A, op, flg));
1485     PetscCall(MatSetOption(a->B, op, flg));
1486     break;
1487   case MAT_FORCE_DIAGONAL_ENTRIES:
1488   case MAT_SORTED_FULL:
1489     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1490     break;
1491   case MAT_IGNORE_OFF_PROC_ENTRIES:
1492     a->donotstash = flg;
1493     break;
1494   case MAT_USE_HASH_TABLE:
1495     a->ht_flag = flg;
1496     a->ht_fact = 1.39;
1497     break;
1498   case MAT_SYMMETRIC:
1499   case MAT_STRUCTURALLY_SYMMETRIC:
1500   case MAT_HERMITIAN:
1501   case MAT_SUBMAT_SINGLEIS:
1502   case MAT_SYMMETRY_ETERNAL:
1503   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1504   case MAT_SPD_ETERNAL:
1505     /* if the diagonal matrix is square it inherits some of the properties above */
1506     break;
1507   default:
1508     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1509   }
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 }
1512 
1513 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1514 {
1515   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1516   Mat_SeqBAIJ *Aloc;
1517   Mat          B;
1518   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1519   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1520   MatScalar   *a;
1521 
1522   PetscFunctionBegin;
1523   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1524   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1525     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1526     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1527     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1528     /* Do not know preallocation information, but must set block size */
1529     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1530   } else {
1531     B = *matout;
1532   }
1533 
1534   /* copy over the A part */
1535   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1536   ai   = Aloc->i;
1537   aj   = Aloc->j;
1538   a    = Aloc->a;
1539   PetscCall(PetscMalloc1(bs, &rvals));
1540 
1541   for (i = 0; i < mbs; i++) {
1542     rvals[0] = bs * (baij->rstartbs + i);
1543     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1544     for (j = ai[i]; j < ai[i + 1]; j++) {
1545       col = (baij->cstartbs + aj[j]) * bs;
1546       for (k = 0; k < bs; k++) {
1547         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1548 
1549         col++;
1550         a += bs;
1551       }
1552     }
1553   }
1554   /* copy over the B part */
1555   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1556   ai   = Aloc->i;
1557   aj   = Aloc->j;
1558   a    = Aloc->a;
1559   for (i = 0; i < mbs; i++) {
1560     rvals[0] = bs * (baij->rstartbs + i);
1561     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1562     for (j = ai[i]; j < ai[i + 1]; j++) {
1563       col = baij->garray[aj[j]] * bs;
1564       for (k = 0; k < bs; k++) {
1565         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1566         col++;
1567         a += bs;
1568       }
1569     }
1570   }
1571   PetscCall(PetscFree(rvals));
1572   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1574 
1575   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1576   else PetscCall(MatHeaderMerge(A, &B));
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1581 {
1582   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1583   Mat          a = baij->A, b = baij->B;
1584   PetscInt     s1, s2, s3;
1585 
1586   PetscFunctionBegin;
1587   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1588   if (rr) {
1589     PetscCall(VecGetLocalSize(rr, &s1));
1590     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1591     /* Overlap communication with computation. */
1592     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593   }
1594   if (ll) {
1595     PetscCall(VecGetLocalSize(ll, &s1));
1596     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1597     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1598   }
1599   /* scale  the diagonal block */
1600   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1601 
1602   if (rr) {
1603     /* Do a scatter end and then right scale the off-diagonal block */
1604     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1605     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1606   }
1607   PetscFunctionReturn(PETSC_SUCCESS);
1608 }
1609 
1610 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1611 {
1612   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1613   PetscInt    *lrows;
1614   PetscInt     r, len;
1615   PetscBool    cong;
1616 
1617   PetscFunctionBegin;
1618   /* get locally owned rows */
1619   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1620   /* fix right hand side if needed */
1621   if (x && b) {
1622     const PetscScalar *xx;
1623     PetscScalar       *bb;
1624 
1625     PetscCall(VecGetArrayRead(x, &xx));
1626     PetscCall(VecGetArray(b, &bb));
1627     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1628     PetscCall(VecRestoreArrayRead(x, &xx));
1629     PetscCall(VecRestoreArray(b, &bb));
1630   }
1631 
1632   /* actually zap the local rows */
1633   /*
1634         Zero the required rows. If the "diagonal block" of the matrix
1635      is square and the user wishes to set the diagonal we use separate
1636      code so that MatSetValues() is not called for each diagonal allocating
1637      new memory, thus calling lots of mallocs and slowing things down.
1638 
1639   */
1640   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1641   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1642   PetscCall(MatHasCongruentLayouts(A, &cong));
1643   if ((diag != 0.0) && cong) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1645   } else if (diag != 0.0) {
1646     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1647     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1648        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1649     for (r = 0; r < len; ++r) {
1650       const PetscInt row = lrows[r] + A->rmap->rstart;
1651       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1652     }
1653     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1654     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1655   } else {
1656     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1657   }
1658   PetscCall(PetscFree(lrows));
1659 
1660   /* only change matrix nonzero state if pattern was allowed to be changed */
1661   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1662     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1663     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1664   }
1665   PetscFunctionReturn(PETSC_SUCCESS);
1666 }
1667 
1668 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1669 {
1670   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1671   PetscMPIInt        n = A->rmap->n, p = 0;
1672   PetscInt           i, j, k, r, len = 0, row, col, count;
1673   PetscInt          *lrows, *owners = A->rmap->range;
1674   PetscSFNode       *rrows;
1675   PetscSF            sf;
1676   const PetscScalar *xx;
1677   PetscScalar       *bb, *mask;
1678   Vec                xmask, lmask;
1679   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1680   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1681   PetscScalar       *aa;
1682 
1683   PetscFunctionBegin;
1684   /* Create SF where leaves are input rows and roots are owned rows */
1685   PetscCall(PetscMalloc1(n, &lrows));
1686   for (r = 0; r < n; ++r) lrows[r] = -1;
1687   PetscCall(PetscMalloc1(N, &rrows));
1688   for (r = 0; r < N; ++r) {
1689     const PetscInt idx = rows[r];
1690     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1691     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1692       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1693     }
1694     rrows[r].rank  = p;
1695     rrows[r].index = rows[r] - owners[p];
1696   }
1697   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1698   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1699   /* Collect flags for rows to be zeroed */
1700   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1701   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1702   PetscCall(PetscSFDestroy(&sf));
1703   /* Compress and put in row numbers */
1704   for (r = 0; r < n; ++r)
1705     if (lrows[r] >= 0) lrows[len++] = r;
1706   /* zero diagonal part of matrix */
1707   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1708   /* handle off diagonal part of matrix */
1709   PetscCall(MatCreateVecs(A, &xmask, NULL));
1710   PetscCall(VecDuplicate(l->lvec, &lmask));
1711   PetscCall(VecGetArray(xmask, &bb));
1712   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1713   PetscCall(VecRestoreArray(xmask, &bb));
1714   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1715   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1716   PetscCall(VecDestroy(&xmask));
1717   if (x) {
1718     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1719     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1720     PetscCall(VecGetArrayRead(l->lvec, &xx));
1721     PetscCall(VecGetArray(b, &bb));
1722   }
1723   PetscCall(VecGetArray(lmask, &mask));
1724   /* remove zeroed rows of off diagonal matrix */
1725   for (i = 0; i < len; ++i) {
1726     row   = lrows[i];
1727     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1728     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1729     for (k = 0; k < count; ++k) {
1730       aa[0] = 0.0;
1731       aa += bs;
1732     }
1733   }
1734   /* loop over all elements of off process part of matrix zeroing removed columns*/
1735   for (i = 0; i < l->B->rmap->N; ++i) {
1736     row = i / bs;
1737     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1738       for (k = 0; k < bs; ++k) {
1739         col = bs * baij->j[j] + k;
1740         if (PetscAbsScalar(mask[col])) {
1741           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1742           if (x) bb[i] -= aa[0] * xx[col];
1743           aa[0] = 0.0;
1744         }
1745       }
1746     }
1747   }
1748   if (x) {
1749     PetscCall(VecRestoreArray(b, &bb));
1750     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1751   }
1752   PetscCall(VecRestoreArray(lmask, &mask));
1753   PetscCall(VecDestroy(&lmask));
1754   PetscCall(PetscFree(lrows));
1755 
1756   /* only change matrix nonzero state if pattern was allowed to be changed */
1757   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1758     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1759     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1760   }
1761   PetscFunctionReturn(PETSC_SUCCESS);
1762 }
1763 
1764 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1765 {
1766   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1767 
1768   PetscFunctionBegin;
1769   PetscCall(MatSetUnfactored(a->A));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1774 
1775 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1776 {
1777   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1778   Mat          a, b, c, d;
1779   PetscBool    flg;
1780 
1781   PetscFunctionBegin;
1782   a = matA->A;
1783   b = matA->B;
1784   c = matB->A;
1785   d = matB->B;
1786 
1787   PetscCall(MatEqual(a, c, &flg));
1788   if (flg) PetscCall(MatEqual(b, d, &flg));
1789   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1790   PetscFunctionReturn(PETSC_SUCCESS);
1791 }
1792 
1793 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1794 {
1795   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1796   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     PetscCall(MatCopy_Basic(A, B, str));
1802   } else {
1803     PetscCall(MatCopy(a->A, b->A, str));
1804     PetscCall(MatCopy(a->B, b->B, str));
1805   }
1806   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1811 {
1812   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1813   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1814   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1815 
1816   PetscFunctionBegin;
1817   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1822 {
1823   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1824   PetscBLASInt bnz, one                         = 1;
1825   Mat_SeqBAIJ *x, *y;
1826   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1827 
1828   PetscFunctionBegin;
1829   if (str == SAME_NONZERO_PATTERN) {
1830     PetscScalar alpha = a;
1831     x                 = (Mat_SeqBAIJ *)xx->A->data;
1832     y                 = (Mat_SeqBAIJ *)yy->A->data;
1833     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1834     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1835     x = (Mat_SeqBAIJ *)xx->B->data;
1836     y = (Mat_SeqBAIJ *)yy->B->data;
1837     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1838     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1839     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1840   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1841     PetscCall(MatAXPY_Basic(Y, a, X, str));
1842   } else {
1843     Mat       B;
1844     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1845     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1846     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1847     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1848     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1849     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1850     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1851     PetscCall(MatSetType(B, MATMPIBAIJ));
1852     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1853     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1854     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1855     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1856     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1857     PetscCall(MatHeaderMerge(Y, &B));
1858     PetscCall(PetscFree(nnz_d));
1859     PetscCall(PetscFree(nnz_o));
1860   }
1861   PetscFunctionReturn(PETSC_SUCCESS);
1862 }
1863 
1864 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1865 {
1866   PetscFunctionBegin;
1867   if (PetscDefined(USE_COMPLEX)) {
1868     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1869 
1870     PetscCall(MatConjugate_SeqBAIJ(a->A));
1871     PetscCall(MatConjugate_SeqBAIJ(a->B));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1877 {
1878   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1879 
1880   PetscFunctionBegin;
1881   PetscCall(MatRealPart(a->A));
1882   PetscCall(MatRealPart(a->B));
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1887 {
1888   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(MatImaginaryPart(a->A));
1892   PetscCall(MatImaginaryPart(a->B));
1893   PetscFunctionReturn(PETSC_SUCCESS);
1894 }
1895 
1896 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1897 {
1898   IS       iscol_local;
1899   PetscInt csize;
1900 
1901   PetscFunctionBegin;
1902   PetscCall(ISGetLocalSize(iscol, &csize));
1903   if (call == MAT_REUSE_MATRIX) {
1904     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1905     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1906   } else {
1907     PetscCall(ISAllGather(iscol, &iscol_local));
1908   }
1909   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1910   if (call == MAT_INITIAL_MATRIX) {
1911     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1912     PetscCall(ISDestroy(&iscol_local));
1913   }
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 /*
1918   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1919   in local and then by concatenating the local matrices the end result.
1920   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1921   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1922 */
1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1924 {
1925   PetscMPIInt  rank, size;
1926   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1927   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1928   Mat          M, Mreuse;
1929   MatScalar   *vwork, *aa;
1930   MPI_Comm     comm;
1931   IS           isrow_new, iscol_new;
1932   Mat_SeqBAIJ *aij;
1933 
1934   PetscFunctionBegin;
1935   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1936   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1937   PetscCallMPI(MPI_Comm_size(comm, &size));
1938   /* The compression and expansion should be avoided. Doesn't point
1939      out errors, might change the indices, hence buggey */
1940   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1941   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1942 
1943   if (call == MAT_REUSE_MATRIX) {
1944     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1945     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1946     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1947   } else {
1948     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1949   }
1950   PetscCall(ISDestroy(&isrow_new));
1951   PetscCall(ISDestroy(&iscol_new));
1952   /*
1953       m - number of local rows
1954       n - number of columns (same on all processors)
1955       rstart - first row in new global matrix generated
1956   */
1957   PetscCall(MatGetBlockSize(mat, &bs));
1958   PetscCall(MatGetSize(Mreuse, &m, &n));
1959   m = m / bs;
1960   n = n / bs;
1961 
1962   if (call == MAT_INITIAL_MATRIX) {
1963     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1964     ii  = aij->i;
1965     jj  = aij->j;
1966 
1967     /*
1968         Determine the number of non-zeros in the diagonal and off-diagonal
1969         portions of the matrix in order to do correct preallocation
1970     */
1971 
1972     /* first get start and end of "diagonal" columns */
1973     if (csize == PETSC_DECIDE) {
1974       PetscCall(ISGetSize(isrow, &mglobal));
1975       if (mglobal == n * bs) { /* square matrix */
1976         nlocal = m;
1977       } else {
1978         nlocal = n / size + ((n % size) > rank);
1979       }
1980     } else {
1981       nlocal = csize / bs;
1982     }
1983     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1984     rstart = rend - nlocal;
1985     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1986 
1987     /* next, compute all the lengths */
1988     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1989     for (i = 0; i < m; i++) {
1990       jend = ii[i + 1] - ii[i];
1991       olen = 0;
1992       dlen = 0;
1993       for (j = 0; j < jend; j++) {
1994         if (*jj < rstart || *jj >= rend) olen++;
1995         else dlen++;
1996         jj++;
1997       }
1998       olens[i] = olen;
1999       dlens[i] = dlen;
2000     }
2001     PetscCall(MatCreate(comm, &M));
2002     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2003     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
2004     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2005     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2006     PetscCall(PetscFree2(dlens, olens));
2007   } else {
2008     PetscInt ml, nl;
2009 
2010     M = *newmat;
2011     PetscCall(MatGetLocalSize(M, &ml, &nl));
2012     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2013     PetscCall(MatZeroEntries(M));
2014     /*
2015          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2016        rather than the slower MatSetValues().
2017     */
2018     M->was_assembled = PETSC_TRUE;
2019     M->assembled     = PETSC_FALSE;
2020   }
2021   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2022   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2023   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2024   ii  = aij->i;
2025   jj  = aij->j;
2026   aa  = aij->a;
2027   for (i = 0; i < m; i++) {
2028     row   = rstart / bs + i;
2029     nz    = ii[i + 1] - ii[i];
2030     cwork = jj;
2031     jj += nz;
2032     vwork = aa;
2033     aa += nz * bs * bs;
2034     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2035   }
2036 
2037   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2038   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2039   *newmat = M;
2040 
2041   /* save submatrix used in processor for next request */
2042   if (call == MAT_INITIAL_MATRIX) {
2043     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2044     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2045   }
2046   PetscFunctionReturn(PETSC_SUCCESS);
2047 }
2048 
2049 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2050 {
2051   MPI_Comm        comm, pcomm;
2052   PetscInt        clocal_size, nrows;
2053   const PetscInt *rows;
2054   PetscMPIInt     size;
2055   IS              crowp, lcolp;
2056 
2057   PetscFunctionBegin;
2058   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2059   /* make a collective version of 'rowp' */
2060   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2061   if (pcomm == comm) {
2062     crowp = rowp;
2063   } else {
2064     PetscCall(ISGetSize(rowp, &nrows));
2065     PetscCall(ISGetIndices(rowp, &rows));
2066     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2067     PetscCall(ISRestoreIndices(rowp, &rows));
2068   }
2069   PetscCall(ISSetPermutation(crowp));
2070   /* make a local version of 'colp' */
2071   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2072   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2073   if (size == 1) {
2074     lcolp = colp;
2075   } else {
2076     PetscCall(ISAllGather(colp, &lcolp));
2077   }
2078   PetscCall(ISSetPermutation(lcolp));
2079   /* now we just get the submatrix */
2080   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2081   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2082   /* clean up */
2083   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2084   if (size > 1) PetscCall(ISDestroy(&lcolp));
2085   PetscFunctionReturn(PETSC_SUCCESS);
2086 }
2087 
2088 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2089 {
2090   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2091   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2092 
2093   PetscFunctionBegin;
2094   if (nghosts) *nghosts = B->nbs;
2095   if (ghosts) *ghosts = baij->garray;
2096   PetscFunctionReturn(PETSC_SUCCESS);
2097 }
2098 
2099 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2100 {
2101   Mat          B;
2102   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2103   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2104   Mat_SeqAIJ  *b;
2105   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2106   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2107   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2108 
2109   PetscFunctionBegin;
2110   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2111   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2112 
2113   /* ----------------------------------------------------------------
2114      Tell every processor the number of nonzeros per row
2115   */
2116   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2117   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2118   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2119   displs = recvcounts + size;
2120   for (i = 0; i < size; i++) {
2121     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2122     displs[i]     = A->rmap->range[i] / bs;
2123   }
2124   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2125   /* ---------------------------------------------------------------
2126      Create the sequential matrix of the same type as the local block diagonal
2127   */
2128   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2129   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2130   PetscCall(MatSetType(B, MATSEQAIJ));
2131   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2132   b = (Mat_SeqAIJ *)B->data;
2133 
2134   /*--------------------------------------------------------------------
2135     Copy my part of matrix column indices over
2136   */
2137   sendcount  = ad->nz + bd->nz;
2138   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2139   a_jsendbuf = ad->j;
2140   b_jsendbuf = bd->j;
2141   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2142   cnt        = 0;
2143   for (i = 0; i < n; i++) {
2144     /* put in lower diagonal portion */
2145     m = bd->i[i + 1] - bd->i[i];
2146     while (m > 0) {
2147       /* is it above diagonal (in bd (compressed) numbering) */
2148       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2149       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2150       m--;
2151     }
2152 
2153     /* put in diagonal portion */
2154     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2155 
2156     /* put in upper diagonal portion */
2157     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2158   }
2159   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2160 
2161   /*--------------------------------------------------------------------
2162     Gather all column indices to all processors
2163   */
2164   for (i = 0; i < size; i++) {
2165     recvcounts[i] = 0;
2166     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2167   }
2168   displs[0] = 0;
2169   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2170   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2171   /*--------------------------------------------------------------------
2172     Assemble the matrix into useable form (note numerical values not yet set)
2173   */
2174   /* set the b->ilen (length of each row) values */
2175   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2176   /* set the b->i indices */
2177   b->i[0] = 0;
2178   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2179   PetscCall(PetscFree(lens));
2180   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2181   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2182   PetscCall(PetscFree(recvcounts));
2183 
2184   PetscCall(MatPropagateSymmetryOptions(A, B));
2185   *newmat = B;
2186   PetscFunctionReturn(PETSC_SUCCESS);
2187 }
2188 
2189 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2190 {
2191   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2192   Vec          bb1 = NULL;
2193 
2194   PetscFunctionBegin;
2195   if (flag == SOR_APPLY_UPPER) {
2196     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2197     PetscFunctionReturn(PETSC_SUCCESS);
2198   }
2199 
2200   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2201 
2202   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2203     if (flag & SOR_ZERO_INITIAL_GUESS) {
2204       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2205       its--;
2206     }
2207 
2208     while (its--) {
2209       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2210       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2211 
2212       /* update rhs: bb1 = bb - B*x */
2213       PetscCall(VecScale(mat->lvec, -1.0));
2214       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2215 
2216       /* local sweep */
2217       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2218     }
2219   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2220     if (flag & SOR_ZERO_INITIAL_GUESS) {
2221       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2222       its--;
2223     }
2224     while (its--) {
2225       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2226       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2227 
2228       /* update rhs: bb1 = bb - B*x */
2229       PetscCall(VecScale(mat->lvec, -1.0));
2230       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2231 
2232       /* local sweep */
2233       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2234     }
2235   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2236     if (flag & SOR_ZERO_INITIAL_GUESS) {
2237       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2238       its--;
2239     }
2240     while (its--) {
2241       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2242       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2243 
2244       /* update rhs: bb1 = bb - B*x */
2245       PetscCall(VecScale(mat->lvec, -1.0));
2246       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2247 
2248       /* local sweep */
2249       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2250     }
2251   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2252 
2253   PetscCall(VecDestroy(&bb1));
2254   PetscFunctionReturn(PETSC_SUCCESS);
2255 }
2256 
2257 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2258 {
2259   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2260   PetscInt     m, N, i, *garray = aij->garray;
2261   PetscInt     ib, jb, bs = A->rmap->bs;
2262   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2263   MatScalar   *a_val = a_aij->a;
2264   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2265   MatScalar   *b_val = b_aij->a;
2266   PetscReal   *work;
2267 
2268   PetscFunctionBegin;
2269   PetscCall(MatGetSize(A, &m, &N));
2270   PetscCall(PetscCalloc1(N, &work));
2271   if (type == NORM_2) {
2272     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2273       for (jb = 0; jb < bs; jb++) {
2274         for (ib = 0; ib < bs; ib++) {
2275           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2276           a_val++;
2277         }
2278       }
2279     }
2280     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2281       for (jb = 0; jb < bs; jb++) {
2282         for (ib = 0; ib < bs; ib++) {
2283           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2284           b_val++;
2285         }
2286       }
2287     }
2288   } else if (type == NORM_1) {
2289     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2290       for (jb = 0; jb < bs; jb++) {
2291         for (ib = 0; ib < bs; ib++) {
2292           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2293           a_val++;
2294         }
2295       }
2296     }
2297     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2298       for (jb = 0; jb < bs; jb++) {
2299         for (ib = 0; ib < bs; ib++) {
2300           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2301           b_val++;
2302         }
2303       }
2304     }
2305   } else if (type == NORM_INFINITY) {
2306     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2307       for (jb = 0; jb < bs; jb++) {
2308         for (ib = 0; ib < bs; ib++) {
2309           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2310           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2311           a_val++;
2312         }
2313       }
2314     }
2315     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2316       for (jb = 0; jb < bs; jb++) {
2317         for (ib = 0; ib < bs; ib++) {
2318           int col   = garray[b_aij->j[i]] * bs + jb;
2319           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2320           b_val++;
2321         }
2322       }
2323     }
2324   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2325     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2326       for (jb = 0; jb < bs; jb++) {
2327         for (ib = 0; ib < bs; ib++) {
2328           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2329           a_val++;
2330         }
2331       }
2332     }
2333     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2334       for (jb = 0; jb < bs; jb++) {
2335         for (ib = 0; ib < bs; ib++) {
2336           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2337           b_val++;
2338         }
2339       }
2340     }
2341   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2342     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2343       for (jb = 0; jb < bs; jb++) {
2344         for (ib = 0; ib < bs; ib++) {
2345           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2346           a_val++;
2347         }
2348       }
2349     }
2350     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2351       for (jb = 0; jb < bs; jb++) {
2352         for (ib = 0; ib < bs; ib++) {
2353           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2354           b_val++;
2355         }
2356       }
2357     }
2358   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2359   if (type == NORM_INFINITY) {
2360     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2361   } else {
2362     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2363   }
2364   PetscCall(PetscFree(work));
2365   if (type == NORM_2) {
2366     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2367   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2368     for (i = 0; i < N; i++) reductions[i] /= m;
2369   }
2370   PetscFunctionReturn(PETSC_SUCCESS);
2371 }
2372 
2373 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2374 {
2375   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2376 
2377   PetscFunctionBegin;
2378   PetscCall(MatInvertBlockDiagonal(a->A, values));
2379   A->factorerrortype             = a->A->factorerrortype;
2380   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2381   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2382   PetscFunctionReturn(PETSC_SUCCESS);
2383 }
2384 
2385 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2386 {
2387   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2388   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2389 
2390   PetscFunctionBegin;
2391   if (!Y->preallocated) {
2392     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2393   } else if (!aij->nz) {
2394     PetscInt nonew = aij->nonew;
2395     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2396     aij->nonew = nonew;
2397   }
2398   PetscCall(MatShift_Basic(Y, a));
2399   PetscFunctionReturn(PETSC_SUCCESS);
2400 }
2401 
2402 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2403 {
2404   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2405 
2406   PetscFunctionBegin;
2407   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2408   PetscCall(MatMissingDiagonal(a->A, missing, d));
2409   if (d) {
2410     PetscInt rstart;
2411     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2412     *d += rstart / A->rmap->bs;
2413   }
2414   PetscFunctionReturn(PETSC_SUCCESS);
2415 }
2416 
2417 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2418 {
2419   PetscFunctionBegin;
2420   *a = ((Mat_MPIBAIJ *)A->data)->A;
2421   PetscFunctionReturn(PETSC_SUCCESS);
2422 }
2423 
2424 /* -------------------------------------------------------------------*/
2425 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2426                                        MatGetRow_MPIBAIJ,
2427                                        MatRestoreRow_MPIBAIJ,
2428                                        MatMult_MPIBAIJ,
2429                                        /* 4*/ MatMultAdd_MPIBAIJ,
2430                                        MatMultTranspose_MPIBAIJ,
2431                                        MatMultTransposeAdd_MPIBAIJ,
2432                                        NULL,
2433                                        NULL,
2434                                        NULL,
2435                                        /*10*/ NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        MatSOR_MPIBAIJ,
2439                                        MatTranspose_MPIBAIJ,
2440                                        /*15*/ MatGetInfo_MPIBAIJ,
2441                                        MatEqual_MPIBAIJ,
2442                                        MatGetDiagonal_MPIBAIJ,
2443                                        MatDiagonalScale_MPIBAIJ,
2444                                        MatNorm_MPIBAIJ,
2445                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2446                                        MatAssemblyEnd_MPIBAIJ,
2447                                        MatSetOption_MPIBAIJ,
2448                                        MatZeroEntries_MPIBAIJ,
2449                                        /*24*/ MatZeroRows_MPIBAIJ,
2450                                        NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        NULL,
2454                                        /*29*/ MatSetUp_MPI_Hash,
2455                                        NULL,
2456                                        NULL,
2457                                        MatGetDiagonalBlock_MPIBAIJ,
2458                                        NULL,
2459                                        /*34*/ MatDuplicate_MPIBAIJ,
2460                                        NULL,
2461                                        NULL,
2462                                        NULL,
2463                                        NULL,
2464                                        /*39*/ MatAXPY_MPIBAIJ,
2465                                        MatCreateSubMatrices_MPIBAIJ,
2466                                        MatIncreaseOverlap_MPIBAIJ,
2467                                        MatGetValues_MPIBAIJ,
2468                                        MatCopy_MPIBAIJ,
2469                                        /*44*/ NULL,
2470                                        MatScale_MPIBAIJ,
2471                                        MatShift_MPIBAIJ,
2472                                        NULL,
2473                                        MatZeroRowsColumns_MPIBAIJ,
2474                                        /*49*/ NULL,
2475                                        NULL,
2476                                        NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2480                                        NULL,
2481                                        MatSetUnfactored_MPIBAIJ,
2482                                        MatPermute_MPIBAIJ,
2483                                        MatSetValuesBlocked_MPIBAIJ,
2484                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2485                                        MatDestroy_MPIBAIJ,
2486                                        MatView_MPIBAIJ,
2487                                        NULL,
2488                                        NULL,
2489                                        /*64*/ NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        /*74*/ NULL,
2500                                        MatFDColoringApply_BAIJ,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        /*79*/ NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        MatLoad_MPIBAIJ,
2509                                        /*84*/ NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        /*89*/ NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        /*94*/ NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        /*99*/ NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        MatConjugate_MPIBAIJ,
2528                                        NULL,
2529                                        /*104*/ NULL,
2530                                        MatRealPart_MPIBAIJ,
2531                                        MatImaginaryPart_MPIBAIJ,
2532                                        NULL,
2533                                        NULL,
2534                                        /*109*/ NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        MatMissingDiagonal_MPIBAIJ,
2539                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2540                                        NULL,
2541                                        MatGetGhosts_MPIBAIJ,
2542                                        NULL,
2543                                        NULL,
2544                                        /*119*/ NULL,
2545                                        NULL,
2546                                        NULL,
2547                                        NULL,
2548                                        MatGetMultiProcBlock_MPIBAIJ,
2549                                        /*124*/ NULL,
2550                                        MatGetColumnReductions_MPIBAIJ,
2551                                        MatInvertBlockDiagonal_MPIBAIJ,
2552                                        NULL,
2553                                        NULL,
2554                                        /*129*/ NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        /*134*/ NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        /*139*/ MatSetBlockSizes_Default,
2565                                        NULL,
2566                                        NULL,
2567                                        MatFDColoringSetUp_MPIXAIJ,
2568                                        NULL,
2569                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2570                                        NULL,
2571                                        NULL,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        /*150*/ NULL,
2576                                        NULL};
2577 
2578 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2579 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2580 
2581 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2582 {
2583   PetscInt        m, rstart, cstart, cend;
2584   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2585   const PetscInt *JJ          = NULL;
2586   PetscScalar    *values      = NULL;
2587   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2588   PetscBool       nooffprocentries;
2589 
2590   PetscFunctionBegin;
2591   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2592   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2593   PetscCall(PetscLayoutSetUp(B->rmap));
2594   PetscCall(PetscLayoutSetUp(B->cmap));
2595   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2596   m      = B->rmap->n / bs;
2597   rstart = B->rmap->rstart / bs;
2598   cstart = B->cmap->rstart / bs;
2599   cend   = B->cmap->rend / bs;
2600 
2601   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2602   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2603   for (i = 0; i < m; i++) {
2604     nz = ii[i + 1] - ii[i];
2605     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2606     nz_max = PetscMax(nz_max, nz);
2607     dlen   = 0;
2608     olen   = 0;
2609     JJ     = jj + ii[i];
2610     for (j = 0; j < nz; j++) {
2611       if (*JJ < cstart || *JJ >= cend) olen++;
2612       else dlen++;
2613       JJ++;
2614     }
2615     d_nnz[i] = dlen;
2616     o_nnz[i] = olen;
2617   }
2618   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2619   PetscCall(PetscFree2(d_nnz, o_nnz));
2620 
2621   values = (PetscScalar *)V;
2622   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2623   for (i = 0; i < m; i++) {
2624     PetscInt        row   = i + rstart;
2625     PetscInt        ncols = ii[i + 1] - ii[i];
2626     const PetscInt *icols = jj + ii[i];
2627     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2628       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2629       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2630     } else { /* block ordering does not match so we can only insert one block at a time. */
2631       PetscInt j;
2632       for (j = 0; j < ncols; j++) {
2633         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2634         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2635       }
2636     }
2637   }
2638 
2639   if (!V) PetscCall(PetscFree(values));
2640   nooffprocentries    = B->nooffprocentries;
2641   B->nooffprocentries = PETSC_TRUE;
2642   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2643   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2644   B->nooffprocentries = nooffprocentries;
2645 
2646   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2647   PetscFunctionReturn(PETSC_SUCCESS);
2648 }
2649 
2650 /*@C
2651    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2652 
2653    Collective
2654 
2655    Input Parameters:
2656 +  B - the matrix
2657 .  bs - the block size
2658 .  i - the indices into j for the start of each local row (starts with zero)
2659 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2660 -  v - optional values in the matrix
2661 
2662    Level: advanced
2663 
2664    Notes:
2665     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2666    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2667    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2668    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2669    block column and the second index is over columns within a block.
2670 
2671    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2672 
2673 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2674 @*/
2675 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2676 {
2677   PetscFunctionBegin;
2678   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2679   PetscValidType(B, 1);
2680   PetscValidLogicalCollectiveInt(B, bs, 2);
2681   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2682   PetscFunctionReturn(PETSC_SUCCESS);
2683 }
2684 
2685 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2686 {
2687   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2688   PetscInt     i;
2689   PetscMPIInt  size;
2690 
2691   PetscFunctionBegin;
2692   if (B->hash_active) {
2693     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
2694     B->hash_active = PETSC_FALSE;
2695   }
2696   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2697   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2698   PetscCall(PetscLayoutSetUp(B->rmap));
2699   PetscCall(PetscLayoutSetUp(B->cmap));
2700   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2701 
2702   if (d_nnz) {
2703     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2704   }
2705   if (o_nnz) {
2706     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2707   }
2708 
2709   b->bs2 = bs * bs;
2710   b->mbs = B->rmap->n / bs;
2711   b->nbs = B->cmap->n / bs;
2712   b->Mbs = B->rmap->N / bs;
2713   b->Nbs = B->cmap->N / bs;
2714 
2715   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2716   b->rstartbs = B->rmap->rstart / bs;
2717   b->rendbs   = B->rmap->rend / bs;
2718   b->cstartbs = B->cmap->rstart / bs;
2719   b->cendbs   = B->cmap->rend / bs;
2720 
2721 #if defined(PETSC_USE_CTABLE)
2722   PetscCall(PetscHMapIDestroy(&b->colmap));
2723 #else
2724   PetscCall(PetscFree(b->colmap));
2725 #endif
2726   PetscCall(PetscFree(b->garray));
2727   PetscCall(VecDestroy(&b->lvec));
2728   PetscCall(VecScatterDestroy(&b->Mvctx));
2729 
2730   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2731   PetscCall(MatDestroy(&b->B));
2732   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2733   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2734   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2735 
2736   PetscCall(MatDestroy(&b->A));
2737   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2738   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2739   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2740 
2741   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2742   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2743   B->preallocated  = PETSC_TRUE;
2744   B->was_assembled = PETSC_FALSE;
2745   B->assembled     = PETSC_FALSE;
2746   PetscFunctionReturn(PETSC_SUCCESS);
2747 }
2748 
2749 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2750 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2751 
2752 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2753 {
2754   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2755   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2756   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2757   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2758 
2759   PetscFunctionBegin;
2760   PetscCall(PetscMalloc1(M + 1, &ii));
2761   ii[0] = 0;
2762   for (i = 0; i < M; i++) {
2763     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2764     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2765     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2766     /* remove one from count of matrix has diagonal */
2767     for (j = id[i]; j < id[i + 1]; j++) {
2768       if (jd[j] == i) {
2769         ii[i + 1]--;
2770         break;
2771       }
2772     }
2773   }
2774   PetscCall(PetscMalloc1(ii[M], &jj));
2775   cnt = 0;
2776   for (i = 0; i < M; i++) {
2777     for (j = io[i]; j < io[i + 1]; j++) {
2778       if (garray[jo[j]] > rstart) break;
2779       jj[cnt++] = garray[jo[j]];
2780     }
2781     for (k = id[i]; k < id[i + 1]; k++) {
2782       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2783     }
2784     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2785   }
2786   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2787   PetscFunctionReturn(PETSC_SUCCESS);
2788 }
2789 
2790 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2791 
2792 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2793 
2794 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2795 {
2796   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2797   Mat_MPIAIJ  *b;
2798   Mat          B;
2799 
2800   PetscFunctionBegin;
2801   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2802 
2803   if (reuse == MAT_REUSE_MATRIX) {
2804     B = *newmat;
2805   } else {
2806     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2807     PetscCall(MatSetType(B, MATMPIAIJ));
2808     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2809     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2810     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2811     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2812   }
2813   b = (Mat_MPIAIJ *)B->data;
2814 
2815   if (reuse == MAT_REUSE_MATRIX) {
2816     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2818   } else {
2819     PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd;
2820     PetscCall(MatDestroy(&b->A));
2821     PetscCall(MatDestroy(&b->B));
2822     PetscCall(MatDisAssemble_MPIBAIJ(A));
2823     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2824     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2825     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2826     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2827     A->symmetric              = sym;
2828     A->hermitian              = hermitian;
2829     A->structurally_symmetric = structurally_symmetric;
2830     A->spd                    = spd;
2831   }
2832   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2833   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2834 
2835   if (reuse == MAT_INPLACE_MATRIX) {
2836     PetscCall(MatHeaderReplace(A, &B));
2837   } else {
2838     *newmat = B;
2839   }
2840   PetscFunctionReturn(PETSC_SUCCESS);
2841 }
2842 
2843 /*MC
2844    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2845 
2846    Options Database Keys:
2847 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2848 . -mat_block_size <bs> - set the blocksize used to store the matrix
2849 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2850 - -mat_use_hash_table <fact> - set hash table factor
2851 
2852    Level: beginner
2853 
2854    Note:
2855     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2856     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2857 
2858 .seealso: `Mat`, MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2859 M*/
2860 
2861 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2862 
2863 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2864 {
2865   Mat_MPIBAIJ *b;
2866   PetscBool    flg = PETSC_FALSE;
2867 
2868   PetscFunctionBegin;
2869   PetscCall(PetscNew(&b));
2870   B->data = (void *)b;
2871 
2872   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2873   B->assembled = PETSC_FALSE;
2874 
2875   B->insertmode = NOT_SET_VALUES;
2876   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2877   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2878 
2879   /* build local table of row and column ownerships */
2880   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2881 
2882   /* build cache for off array entries formed */
2883   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2884 
2885   b->donotstash  = PETSC_FALSE;
2886   b->colmap      = NULL;
2887   b->garray      = NULL;
2888   b->roworiented = PETSC_TRUE;
2889 
2890   /* stuff used in block assembly */
2891   b->barray = NULL;
2892 
2893   /* stuff used for matrix vector multiply */
2894   b->lvec  = NULL;
2895   b->Mvctx = NULL;
2896 
2897   /* stuff for MatGetRow() */
2898   b->rowindices   = NULL;
2899   b->rowvalues    = NULL;
2900   b->getrowactive = PETSC_FALSE;
2901 
2902   /* hash table stuff */
2903   b->ht           = NULL;
2904   b->hd           = NULL;
2905   b->ht_size      = 0;
2906   b->ht_flag      = PETSC_FALSE;
2907   b->ht_fact      = 0;
2908   b->ht_total_ct  = 0;
2909   b->ht_insert_ct = 0;
2910 
2911   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2912   b->ijonly = PETSC_FALSE;
2913 
2914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2917 #if defined(PETSC_HAVE_HYPRE)
2918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2919 #endif
2920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2927   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2928 
2929   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2930   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2931   if (flg) {
2932     PetscReal fact = 1.39;
2933     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2934     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2935     if (fact <= 1.0) fact = 1.39;
2936     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2937     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2938   }
2939   PetscOptionsEnd();
2940   PetscFunctionReturn(PETSC_SUCCESS);
2941 }
2942 
2943 /*MC
2944    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2945 
2946    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2947    and `MATMPIBAIJ` otherwise.
2948 
2949    Options Database Keys:
2950 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2951 
2952   Level: beginner
2953 
2954 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2955 M*/
2956 
2957 /*@C
2958    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2959    (block compressed row).
2960 
2961    Collective
2962 
2963    Input Parameters:
2964 +  B - the matrix
2965 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2966           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2967 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2968            submatrix  (same for all local rows)
2969 .  d_nnz - array containing the number of block nonzeros in the various block rows
2970            of the in diagonal portion of the local (possibly different for each block
2971            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2972            set it even if it is zero.
2973 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2974            submatrix (same for all local rows).
2975 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2976            off-diagonal portion of the local submatrix (possibly different for
2977            each block row) or `NULL`.
2978 
2979    If the *_nnz parameter is given then the *_nz parameter is ignored
2980 
2981    Options Database Keys:
2982 +   -mat_block_size - size of the blocks to use
2983 -   -mat_use_hash_table <fact> - set hash table factor
2984 
2985    Level: intermediate
2986 
2987    Notes:
2988    For good matrix assembly performance
2989    the user should preallocate the matrix storage by setting the parameters
2990    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2991    performance can be increased by more than a factor of 50.
2992 
2993    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2994    than it must be used on all processors that share the object for that argument.
2995 
2996    Storage Information:
2997    For a square global matrix we define each processor's diagonal portion
2998    to be its local rows and the corresponding columns (a square submatrix);
2999    each processor's off-diagonal portion encompasses the remainder of the
3000    local matrix (a rectangular submatrix).
3001 
3002    The user can specify preallocated storage for the diagonal part of
3003    the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3004    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3005    memory allocation.  Likewise, specify preallocated storage for the
3006    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3007 
3008    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3009    the figure below we depict these three local rows and all columns (0-11).
3010 
3011 .vb
3012            0 1 2 3 4 5 6 7 8 9 10 11
3013           --------------------------
3014    row 3  |o o o d d d o o o o  o  o
3015    row 4  |o o o d d d o o o o  o  o
3016    row 5  |o o o d d d o o o o  o  o
3017           --------------------------
3018 .ve
3019 
3020    Thus, any entries in the d locations are stored in the d (diagonal)
3021    submatrix, and any entries in the o locations are stored in the
3022    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3023    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3024 
3025    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3026    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3027    In general, for PDE problems in which most nonzeros are near the diagonal,
3028    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
3029    or you will get TERRIBLE performance; see the users' manual chapter on
3030    matrices.
3031 
3032    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3033    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3034    You can also run with the option `-info` and look for messages with the string
3035    malloc in them to see if additional memory allocation was needed.
3036 
3037 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3038 @*/
3039 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3040 {
3041   PetscFunctionBegin;
3042   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3043   PetscValidType(B, 1);
3044   PetscValidLogicalCollectiveInt(B, bs, 2);
3045   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3046   PetscFunctionReturn(PETSC_SUCCESS);
3047 }
3048 
3049 /*@C
3050    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3051    (block compressed row).
3052 
3053    Collective
3054 
3055    Input Parameters:
3056 +  comm - MPI communicator
3057 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3058           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3059 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3060            This value should be the same as the local size used in creating the
3061            y vector for the matrix-vector product y = Ax.
3062 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3063            This value should be the same as the local size used in creating the
3064            x vector for the matrix-vector product y = Ax.
3065 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3066 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3067 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3068            submatrix  (same for all local rows)
3069 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3070            of the in diagonal portion of the local (possibly different for each block
3071            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3072            and set it even if it is zero.
3073 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3074            submatrix (same for all local rows).
3075 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3076            off-diagonal portion of the local submatrix (possibly different for
3077            each block row) or NULL.
3078 
3079    Output Parameter:
3080 .  A - the matrix
3081 
3082    Options Database Keys:
3083 +   -mat_block_size - size of the blocks to use
3084 -   -mat_use_hash_table <fact> - set hash table factor
3085 
3086    Level: intermediate
3087 
3088    Notes:
3089    For good matrix assembly performance
3090    the user should preallocate the matrix storage by setting the parameters
3091    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3092    performance can be increased by more than a factor of 50.
3093 
3094    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3095    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3096    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3097 
3098    If the *_nnz parameter is given then the *_nz parameter is ignored
3099 
3100    A nonzero block is any block that as 1 or more nonzeros in it
3101 
3102    The user MUST specify either the local or global matrix dimensions
3103    (possibly both).
3104 
3105    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3106    than it must be used on all processors that share the object for that argument.
3107 
3108    Storage Information:
3109    For a square global matrix we define each processor's diagonal portion
3110    to be its local rows and the corresponding columns (a square submatrix);
3111    each processor's off-diagonal portion encompasses the remainder of the
3112    local matrix (a rectangular submatrix).
3113 
3114    The user can specify preallocated storage for the diagonal part of
3115    the local submatrix with either d_nz or d_nnz (not both).  Set
3116    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3117    memory allocation.  Likewise, specify preallocated storage for the
3118    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3119 
3120    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3121    the figure below we depict these three local rows and all columns (0-11).
3122 
3123 .vb
3124            0 1 2 3 4 5 6 7 8 9 10 11
3125           --------------------------
3126    row 3  |o o o d d d o o o o  o  o
3127    row 4  |o o o d d d o o o o  o  o
3128    row 5  |o o o d d d o o o o  o  o
3129           --------------------------
3130 .ve
3131 
3132    Thus, any entries in the d locations are stored in the d (diagonal)
3133    submatrix, and any entries in the o locations are stored in the
3134    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3135    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3136 
3137    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3138    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3139    In general, for PDE problems in which most nonzeros are near the diagonal,
3140    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
3141    or you will get TERRIBLE performance; see the users' manual chapter on
3142    matrices.
3143 
3144 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3145 @*/
3146 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3147 {
3148   PetscMPIInt size;
3149 
3150   PetscFunctionBegin;
3151   PetscCall(MatCreate(comm, A));
3152   PetscCall(MatSetSizes(*A, m, n, M, N));
3153   PetscCallMPI(MPI_Comm_size(comm, &size));
3154   if (size > 1) {
3155     PetscCall(MatSetType(*A, MATMPIBAIJ));
3156     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3157   } else {
3158     PetscCall(MatSetType(*A, MATSEQBAIJ));
3159     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3160   }
3161   PetscFunctionReturn(PETSC_SUCCESS);
3162 }
3163 
3164 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3165 {
3166   Mat          mat;
3167   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3168   PetscInt     len = 0;
3169 
3170   PetscFunctionBegin;
3171   *newmat = NULL;
3172   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3173   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3174   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3175 
3176   mat->factortype   = matin->factortype;
3177   mat->preallocated = PETSC_TRUE;
3178   mat->assembled    = PETSC_TRUE;
3179   mat->insertmode   = NOT_SET_VALUES;
3180 
3181   a             = (Mat_MPIBAIJ *)mat->data;
3182   mat->rmap->bs = matin->rmap->bs;
3183   a->bs2        = oldmat->bs2;
3184   a->mbs        = oldmat->mbs;
3185   a->nbs        = oldmat->nbs;
3186   a->Mbs        = oldmat->Mbs;
3187   a->Nbs        = oldmat->Nbs;
3188 
3189   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3190   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3191 
3192   a->size         = oldmat->size;
3193   a->rank         = oldmat->rank;
3194   a->donotstash   = oldmat->donotstash;
3195   a->roworiented  = oldmat->roworiented;
3196   a->rowindices   = NULL;
3197   a->rowvalues    = NULL;
3198   a->getrowactive = PETSC_FALSE;
3199   a->barray       = NULL;
3200   a->rstartbs     = oldmat->rstartbs;
3201   a->rendbs       = oldmat->rendbs;
3202   a->cstartbs     = oldmat->cstartbs;
3203   a->cendbs       = oldmat->cendbs;
3204 
3205   /* hash table stuff */
3206   a->ht           = NULL;
3207   a->hd           = NULL;
3208   a->ht_size      = 0;
3209   a->ht_flag      = oldmat->ht_flag;
3210   a->ht_fact      = oldmat->ht_fact;
3211   a->ht_total_ct  = 0;
3212   a->ht_insert_ct = 0;
3213 
3214   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3215   if (oldmat->colmap) {
3216 #if defined(PETSC_USE_CTABLE)
3217     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3218 #else
3219     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3220     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3221 #endif
3222   } else a->colmap = NULL;
3223 
3224   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3225     PetscCall(PetscMalloc1(len, &a->garray));
3226     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3227   } else a->garray = NULL;
3228 
3229   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3230   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3231   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3232 
3233   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3234   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3235   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3236   *newmat = mat;
3237   PetscFunctionReturn(PETSC_SUCCESS);
3238 }
3239 
3240 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3241 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3242 {
3243   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3244   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3245   PetscScalar *matvals;
3246 
3247   PetscFunctionBegin;
3248   PetscCall(PetscViewerSetUp(viewer));
3249 
3250   /* read in matrix header */
3251   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3252   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3253   M  = header[1];
3254   N  = header[2];
3255   nz = header[3];
3256   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3257   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3258   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3259 
3260   /* set block sizes from the viewer's .info file */
3261   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3262   /* set local sizes if not set already */
3263   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3264   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3265   /* set global sizes if not set already */
3266   if (mat->rmap->N < 0) mat->rmap->N = M;
3267   if (mat->cmap->N < 0) mat->cmap->N = N;
3268   PetscCall(PetscLayoutSetUp(mat->rmap));
3269   PetscCall(PetscLayoutSetUp(mat->cmap));
3270 
3271   /* check if the matrix sizes are correct */
3272   PetscCall(MatGetSize(mat, &rows, &cols));
3273   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3274   PetscCall(MatGetBlockSize(mat, &bs));
3275   PetscCall(MatGetLocalSize(mat, &m, &n));
3276   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3277   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3278   mbs = m / bs;
3279   nbs = n / bs;
3280 
3281   /* read in row lengths and build row indices */
3282   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3283   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3284   rowidxs[0] = 0;
3285   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3286   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3287   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3288 
3289   /* read in column indices and matrix values */
3290   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3291   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3292   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3293 
3294   {                /* preallocate matrix storage */
3295     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3296     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3297     PetscBool  sbaij, done;
3298     PetscInt  *d_nnz, *o_nnz;
3299 
3300     PetscCall(PetscBTCreate(nbs, &bt));
3301     PetscCall(PetscHSetICreate(&ht));
3302     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3303     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3304     for (i = 0; i < mbs; i++) {
3305       PetscCall(PetscBTMemzero(nbs, bt));
3306       PetscCall(PetscHSetIClear(ht));
3307       for (k = 0; k < bs; k++) {
3308         PetscInt row = bs * i + k;
3309         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3310           PetscInt col = colidxs[j];
3311           if (!sbaij || col >= row) {
3312             if (col >= cs && col < ce) {
3313               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3314             } else {
3315               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3316               if (done) o_nnz[i]++;
3317             }
3318           }
3319         }
3320       }
3321     }
3322     PetscCall(PetscBTDestroy(&bt));
3323     PetscCall(PetscHSetIDestroy(&ht));
3324     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3325     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3326     PetscCall(PetscFree2(d_nnz, o_nnz));
3327   }
3328 
3329   /* store matrix values */
3330   for (i = 0; i < m; i++) {
3331     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3332     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3333   }
3334 
3335   PetscCall(PetscFree(rowidxs));
3336   PetscCall(PetscFree2(colidxs, matvals));
3337   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3338   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3339   PetscFunctionReturn(PETSC_SUCCESS);
3340 }
3341 
3342 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3343 {
3344   PetscBool isbinary;
3345 
3346   PetscFunctionBegin;
3347   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3348   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3349   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3350   PetscFunctionReturn(PETSC_SUCCESS);
3351 }
3352 
3353 /*@
3354    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3355 
3356    Input Parameters:
3357 +  mat  - the matrix
3358 -  fact - factor
3359 
3360    Options Database Key:
3361 .  -mat_use_hash_table <fact> - provide the factor
3362 
3363    Level: advanced
3364 
3365 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3366 @*/
3367 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3368 {
3369   PetscFunctionBegin;
3370   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3371   PetscFunctionReturn(PETSC_SUCCESS);
3372 }
3373 
3374 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3375 {
3376   Mat_MPIBAIJ *baij;
3377 
3378   PetscFunctionBegin;
3379   baij          = (Mat_MPIBAIJ *)mat->data;
3380   baij->ht_fact = fact;
3381   PetscFunctionReturn(PETSC_SUCCESS);
3382 }
3383 
3384 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3385 {
3386   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3387   PetscBool    flg;
3388 
3389   PetscFunctionBegin;
3390   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3391   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3392   if (Ad) *Ad = a->A;
3393   if (Ao) *Ao = a->B;
3394   if (colmap) *colmap = a->garray;
3395   PetscFunctionReturn(PETSC_SUCCESS);
3396 }
3397 
3398 /*
3399     Special version for direct calls from Fortran (to eliminate two function call overheads
3400 */
3401 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3402   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3403 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3404   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3405 #endif
3406 
3407 /*@C
3408   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3409 
3410   Collective
3411 
3412   Input Parameters:
3413 + mat - the matrix
3414 . min - number of input rows
3415 . im - input rows
3416 . nin - number of input columns
3417 . in - input columns
3418 . v - numerical values input
3419 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3420 
3421   Developer Note:
3422     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3423 
3424   Level: advanced
3425 
3426 .seealso: `Mat`, `MatSetValuesBlocked()`
3427 @*/
3428 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3429 {
3430   /* convert input arguments to C version */
3431   Mat        mat = *matin;
3432   PetscInt   m = *min, n = *nin;
3433   InsertMode addv = *addvin;
3434 
3435   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3436   const MatScalar *value;
3437   MatScalar       *barray      = baij->barray;
3438   PetscBool        roworiented = baij->roworiented;
3439   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3440   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3441   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3442 
3443   PetscFunctionBegin;
3444   /* tasks normally handled by MatSetValuesBlocked() */
3445   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3446   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3447   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3448   if (mat->assembled) {
3449     mat->was_assembled = PETSC_TRUE;
3450     mat->assembled     = PETSC_FALSE;
3451   }
3452   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3453 
3454   if (!barray) {
3455     PetscCall(PetscMalloc1(bs2, &barray));
3456     baij->barray = barray;
3457   }
3458 
3459   if (roworiented) stepval = (n - 1) * bs;
3460   else stepval = (m - 1) * bs;
3461 
3462   for (i = 0; i < m; i++) {
3463     if (im[i] < 0) continue;
3464     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3465     if (im[i] >= rstart && im[i] < rend) {
3466       row = im[i] - rstart;
3467       for (j = 0; j < n; j++) {
3468         /* If NumCol = 1 then a copy is not required */
3469         if ((roworiented) && (n == 1)) {
3470           barray = (MatScalar *)v + i * bs2;
3471         } else if ((!roworiented) && (m == 1)) {
3472           barray = (MatScalar *)v + j * bs2;
3473         } else { /* Here a copy is required */
3474           if (roworiented) {
3475             value = v + i * (stepval + bs) * bs + j * bs;
3476           } else {
3477             value = v + j * (stepval + bs) * bs + i * bs;
3478           }
3479           for (ii = 0; ii < bs; ii++, value += stepval) {
3480             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3481           }
3482           barray -= bs2;
3483         }
3484 
3485         if (in[j] >= cstart && in[j] < cend) {
3486           col = in[j] - cstart;
3487           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3488         } else if (in[j] < 0) {
3489           continue;
3490         } else {
3491           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3492           if (mat->was_assembled) {
3493             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3494 
3495 #if defined(PETSC_USE_DEBUG)
3496   #if defined(PETSC_USE_CTABLE)
3497             {
3498               PetscInt data;
3499               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3500               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3501             }
3502   #else
3503             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3504   #endif
3505 #endif
3506 #if defined(PETSC_USE_CTABLE)
3507             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3508             col = (col - 1) / bs;
3509 #else
3510             col = (baij->colmap[in[j]] - 1) / bs;
3511 #endif
3512             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3513               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3514               col = in[j];
3515             }
3516           } else col = in[j];
3517           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3518         }
3519       }
3520     } else {
3521       if (!baij->donotstash) {
3522         if (roworiented) {
3523           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3524         } else {
3525           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3526         }
3527       }
3528     }
3529   }
3530 
3531   /* task normally handled by MatSetValuesBlocked() */
3532   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3533   PetscFunctionReturn(PETSC_SUCCESS);
3534 }
3535 
3536 /*@
3537      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3538          CSR format the local rows.
3539 
3540    Collective
3541 
3542    Input Parameters:
3543 +  comm - MPI communicator
3544 .  bs - the block size, only a block size of 1 is supported
3545 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3546 .  n - This value should be the same as the local size used in creating the
3547        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3548        calculated if N is given) For square matrices n is almost always m.
3549 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3550 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3551 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3552 .   j - column indices
3553 -   a - matrix values
3554 
3555    Output Parameter:
3556 .   mat - the matrix
3557 
3558    Level: intermediate
3559 
3560    Notes:
3561        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3562      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3563      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3564 
3565      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3566      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3567      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3568      with column-major ordering within blocks.
3569 
3570        The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3571 
3572 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3573           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3574 @*/
3575 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3576 {
3577   PetscFunctionBegin;
3578   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3579   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3580   PetscCall(MatCreate(comm, mat));
3581   PetscCall(MatSetSizes(*mat, m, n, M, N));
3582   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3583   PetscCall(MatSetBlockSize(*mat, bs));
3584   PetscCall(MatSetUp(*mat));
3585   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3586   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3587   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3588   PetscFunctionReturn(PETSC_SUCCESS);
3589 }
3590 
3591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3592 {
3593   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3594   PetscInt    *indx;
3595   PetscScalar *values;
3596 
3597   PetscFunctionBegin;
3598   PetscCall(MatGetSize(inmat, &m, &N));
3599   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3600     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3601     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3602     PetscInt    *bindx, rmax = a->rmax, j;
3603     PetscMPIInt  rank, size;
3604 
3605     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3606     mbs = m / bs;
3607     Nbs = N / cbs;
3608     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3609     nbs = n / cbs;
3610 
3611     PetscCall(PetscMalloc1(rmax, &bindx));
3612     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3613 
3614     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3615     PetscCallMPI(MPI_Comm_rank(comm, &size));
3616     if (rank == size - 1) {
3617       /* Check sum(nbs) = Nbs */
3618       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3619     }
3620 
3621     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3622     for (i = 0; i < mbs; i++) {
3623       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3624       nnz = nnz / bs;
3625       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3626       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3627       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3628     }
3629     PetscCall(PetscFree(bindx));
3630 
3631     PetscCall(MatCreate(comm, outmat));
3632     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3633     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3634     PetscCall(MatSetType(*outmat, MATBAIJ));
3635     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3636     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3637     MatPreallocateEnd(dnz, onz);
3638     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3639   }
3640 
3641   /* numeric phase */
3642   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3643   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3644 
3645   for (i = 0; i < m; i++) {
3646     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3647     Ii = i + rstart;
3648     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3649     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3650   }
3651   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3652   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3653   PetscFunctionReturn(PETSC_SUCCESS);
3654 }
3655