xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12 #if defined(PETSC_USE_LOG)
13   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
14 #endif
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(MatStashDestroy_Private(&mat->bstash));
17   PetscCall(MatDestroy(&baij->A));
18   PetscCall(MatDestroy(&baij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&baij->colmap));
21 #else
22   PetscCall(PetscFree(baij->colmap));
23 #endif
24   PetscCall(PetscFree(baij->garray));
25   PetscCall(VecDestroy(&baij->lvec));
26   PetscCall(VecScatterDestroy(&baij->Mvctx));
27   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
28   PetscCall(PetscFree(baij->barray));
29   PetscCall(PetscFree2(baij->hd, baij->ht));
30   PetscCall(PetscFree(baij->rangebs));
31   PetscCall(PetscFree(mat->data));
32 
33   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
43 #if defined(PETSC_HAVE_HYPRE)
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
45 #endif
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
51 #define TYPE BAIJ
52 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
53 #undef TYPE
54 
55 #if defined(PETSC_HAVE_HYPRE)
56 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
57 #endif
58 
59 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
60 {
61   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
62   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
63   PetscScalar       *va, *vv;
64   Vec                vB, vA;
65   const PetscScalar *vb;
66 
67   PetscFunctionBegin;
68   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
69   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
70 
71   PetscCall(VecGetArrayWrite(vA, &va));
72   if (idx) {
73     for (i = 0; i < m; i++) {
74       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
75     }
76   }
77 
78   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
79   PetscCall(PetscMalloc1(m, &idxb));
80   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
81 
82   PetscCall(VecGetArrayWrite(v, &vv));
83   PetscCall(VecGetArrayRead(vB, &vb));
84   for (i = 0; i < m; i++) {
85     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
86       vv[i] = vb[i];
87       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
88     } else {
89       vv[i] = va[i];
90       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
91     }
92   }
93   PetscCall(VecRestoreArrayWrite(vA, &vv));
94   PetscCall(VecRestoreArrayWrite(vA, &va));
95   PetscCall(VecRestoreArrayRead(vB, &vb));
96   PetscCall(PetscFree(idxb));
97   PetscCall(VecDestroy(&vA));
98   PetscCall(VecDestroy(&vB));
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
103 {
104   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
105 
106   PetscFunctionBegin;
107   PetscCall(MatStoreValues(aij->A));
108   PetscCall(MatStoreValues(aij->B));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
113 {
114   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
115 
116   PetscFunctionBegin;
117   PetscCall(MatRetrieveValues(aij->A));
118   PetscCall(MatRetrieveValues(aij->B));
119   PetscFunctionReturn(PETSC_SUCCESS);
120 }
121 
122 /*
123      Local utility routine that creates a mapping from the global column
124    number to the local number in the off-diagonal part of the local
125    storage of the matrix.  This is done in a non scalable way since the
126    length of colmap equals the global matrix length.
127 */
128 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
129 {
130   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
131   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
132   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
133 
134   PetscFunctionBegin;
135 #if defined(PETSC_USE_CTABLE)
136   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
137   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
138 #else
139   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
140   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
141 #endif
142   PetscFunctionReturn(PETSC_SUCCESS);
143 }
144 
145 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
146   { \
147     brow = row / bs; \
148     rp   = aj + ai[brow]; \
149     ap   = aa + bs2 * ai[brow]; \
150     rmax = aimax[brow]; \
151     nrow = ailen[brow]; \
152     bcol = col / bs; \
153     ridx = row % bs; \
154     cidx = col % bs; \
155     low  = 0; \
156     high = nrow; \
157     while (high - low > 3) { \
158       t = (low + high) / 2; \
159       if (rp[t] > bcol) high = t; \
160       else low = t; \
161     } \
162     for (_i = low; _i < high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2 * _i + bs * cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value; \
167         else *bap = value; \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
174     N = nrow++ - 1; \
175     /* shift up all the later entries in this row */ \
176     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
177     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
178     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
179     rp[_i]                          = bcol; \
180     ap[bs2 * _i + bs * cidx + ridx] = value; \
181   a_noinsert:; \
182     ailen[brow] = nrow; \
183   }
184 
185 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
186   { \
187     brow = row / bs; \
188     rp   = bj + bi[brow]; \
189     ap   = ba + bs2 * bi[brow]; \
190     rmax = bimax[brow]; \
191     nrow = bilen[brow]; \
192     bcol = col / bs; \
193     ridx = row % bs; \
194     cidx = col % bs; \
195     low  = 0; \
196     high = nrow; \
197     while (high - low > 3) { \
198       t = (low + high) / 2; \
199       if (rp[t] > bcol) high = t; \
200       else low = t; \
201     } \
202     for (_i = low; _i < high; _i++) { \
203       if (rp[_i] > bcol) break; \
204       if (rp[_i] == bcol) { \
205         bap = ap + bs2 * _i + bs * cidx + ridx; \
206         if (addv == ADD_VALUES) *bap += value; \
207         else *bap = value; \
208         goto b_noinsert; \
209       } \
210     } \
211     if (b->nonew == 1) goto b_noinsert; \
212     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
213     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
214     N = nrow++ - 1; \
215     /* shift up all the later entries in this row */ \
216     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
217     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
218     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
219     rp[_i]                          = bcol; \
220     ap[bs2 * _i + bs * cidx + ridx] = value; \
221   b_noinsert:; \
222     bilen[brow] = nrow; \
223   }
224 
225 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
226 {
227   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
228   MatScalar    value;
229   PetscBool    roworiented = baij->roworiented;
230   PetscInt     i, j, row, col;
231   PetscInt     rstart_orig = mat->rmap->rstart;
232   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
233   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
234 
235   /* Some Variables required in the macro */
236   Mat          A     = baij->A;
237   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
238   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
239   MatScalar   *aa = a->a;
240 
241   Mat          B     = baij->B;
242   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
243   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
244   MatScalar   *ba = b->a;
245 
246   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
247   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
248   MatScalar *ap, *bap;
249 
250   PetscFunctionBegin;
251   for (i = 0; i < m; i++) {
252     if (im[i] < 0) continue;
253     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
254     if (im[i] >= rstart_orig && im[i] < rend_orig) {
255       row = im[i] - rstart_orig;
256       for (j = 0; j < n; j++) {
257         if (in[j] >= cstart_orig && in[j] < cend_orig) {
258           col = in[j] - cstart_orig;
259           if (roworiented) value = v[i * n + j];
260           else value = v[i + j * m];
261           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
262         } else if (in[j] < 0) {
263           continue;
264         } else {
265           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
266           if (mat->was_assembled) {
267             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
268 #if defined(PETSC_USE_CTABLE)
269             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
270             col = col - 1;
271 #else
272             col = baij->colmap[in[j] / bs] - 1;
273 #endif
274             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
275               PetscCall(MatDisAssemble_MPIBAIJ(mat));
276               col = in[j];
277               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
278               B     = baij->B;
279               b     = (Mat_SeqBAIJ *)(B)->data;
280               bimax = b->imax;
281               bi    = b->i;
282               bilen = b->ilen;
283               bj    = b->j;
284               ba    = b->a;
285             } else {
286               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
287               col += in[j] % bs;
288             }
289           } else col = in[j];
290           if (roworiented) value = v[i * n + j];
291           else value = v[i + j * m];
292           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
293           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
294         }
295       }
296     } else {
297       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
298       if (!baij->donotstash) {
299         mat->assembled = PETSC_FALSE;
300         if (roworiented) {
301           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
302         } else {
303           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
304         }
305       }
306     }
307   }
308   PetscFunctionReturn(PETSC_SUCCESS);
309 }
310 
311 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
312 {
313   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
314   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
315   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
316   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
317   PetscBool          roworiented = a->roworiented;
318   const PetscScalar *value       = v;
319   MatScalar         *ap, *aa = a->a, *bap;
320 
321   PetscFunctionBegin;
322   rp    = aj + ai[row];
323   ap    = aa + bs2 * ai[row];
324   rmax  = imax[row];
325   nrow  = ailen[row];
326   value = v;
327   low   = 0;
328   high  = nrow;
329   while (high - low > 7) {
330     t = (low + high) / 2;
331     if (rp[t] > col) high = t;
332     else low = t;
333   }
334   for (i = low; i < high; i++) {
335     if (rp[i] > col) break;
336     if (rp[i] == col) {
337       bap = ap + bs2 * i;
338       if (roworiented) {
339         if (is == ADD_VALUES) {
340           for (ii = 0; ii < bs; ii++) {
341             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
342           }
343         } else {
344           for (ii = 0; ii < bs; ii++) {
345             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
346           }
347         }
348       } else {
349         if (is == ADD_VALUES) {
350           for (ii = 0; ii < bs; ii++, value += bs) {
351             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
352             bap += bs;
353           }
354         } else {
355           for (ii = 0; ii < bs; ii++, value += bs) {
356             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
357             bap += bs;
358           }
359         }
360       }
361       goto noinsert2;
362     }
363   }
364   if (nonew == 1) goto noinsert2;
365   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
366   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
367   N = nrow++ - 1;
368   high++;
369   /* shift up all the later entries in this row */
370   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
371   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
372   rp[i] = col;
373   bap   = ap + bs2 * i;
374   if (roworiented) {
375     for (ii = 0; ii < bs; ii++) {
376       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
377     }
378   } else {
379     for (ii = 0; ii < bs; ii++) {
380       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
381     }
382   }
383 noinsert2:;
384   ailen[row] = nrow;
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 /*
389     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
390     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
391 */
392 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
393 {
394   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
395   const PetscScalar *value;
396   MatScalar         *barray      = baij->barray;
397   PetscBool          roworiented = baij->roworiented;
398   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
399   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
400   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
401 
402   PetscFunctionBegin;
403   if (!barray) {
404     PetscCall(PetscMalloc1(bs2, &barray));
405     baij->barray = barray;
406   }
407 
408   if (roworiented) stepval = (n - 1) * bs;
409   else stepval = (m - 1) * bs;
410 
411   for (i = 0; i < m; i++) {
412     if (im[i] < 0) continue;
413     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
414     if (im[i] >= rstart && im[i] < rend) {
415       row = im[i] - rstart;
416       for (j = 0; j < n; j++) {
417         /* If NumCol = 1 then a copy is not required */
418         if ((roworiented) && (n == 1)) {
419           barray = (MatScalar *)v + i * bs2;
420         } else if ((!roworiented) && (m == 1)) {
421           barray = (MatScalar *)v + j * bs2;
422         } else { /* Here a copy is required */
423           if (roworiented) {
424             value = v + (i * (stepval + bs) + j) * bs;
425           } else {
426             value = v + (j * (stepval + bs) + i) * bs;
427           }
428           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
429             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
430             barray += bs;
431           }
432           barray -= bs2;
433         }
434 
435         if (in[j] >= cstart && in[j] < cend) {
436           col = in[j] - cstart;
437           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
438         } else if (in[j] < 0) {
439           continue;
440         } else {
441           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
442           if (mat->was_assembled) {
443             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
444 
445 #if defined(PETSC_USE_DEBUG)
446   #if defined(PETSC_USE_CTABLE)
447             {
448               PetscInt data;
449               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
450               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
451             }
452   #else
453             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
454   #endif
455 #endif
456 #if defined(PETSC_USE_CTABLE)
457             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
458             col = (col - 1) / bs;
459 #else
460             col = (baij->colmap[in[j]] - 1) / bs;
461 #endif
462             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
463               PetscCall(MatDisAssemble_MPIBAIJ(mat));
464               col = in[j];
465             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
466           } else col = in[j];
467           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
468         }
469       }
470     } else {
471       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
472       if (!baij->donotstash) {
473         if (roworiented) {
474           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         } else {
476           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
477         }
478       }
479     }
480   }
481   PetscFunctionReturn(PETSC_SUCCESS);
482 }
483 
484 #define HASH_KEY             0.6180339887
485 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
486 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
487 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
488 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
489 {
490   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
491   PetscBool    roworiented = baij->roworiented;
492   PetscInt     i, j, row, col;
493   PetscInt     rstart_orig = mat->rmap->rstart;
494   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
495   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
496   PetscReal    tmp;
497   MatScalar  **HD       = baij->hd, value;
498   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
499 
500   PetscFunctionBegin;
501   for (i = 0; i < m; i++) {
502     if (PetscDefined(USE_DEBUG)) {
503       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
504       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
505     }
506     row = im[i];
507     if (row >= rstart_orig && row < rend_orig) {
508       for (j = 0; j < n; j++) {
509         col = in[j];
510         if (roworiented) value = v[i * n + j];
511         else value = v[i + j * m];
512         /* Look up PetscInto the Hash Table */
513         key = (row / bs) * Nbs + (col / bs) + 1;
514         h1  = HASH(size, key, tmp);
515 
516         idx = h1;
517         if (PetscDefined(USE_DEBUG)) {
518           insert_ct++;
519           total_ct++;
520           if (HT[idx] != key) {
521             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
522               ;
523             if (idx == size) {
524               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
525                 ;
526               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
527             }
528           }
529         } else if (HT[idx] != key) {
530           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
531             ;
532           if (idx == size) {
533             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
534               ;
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
595               ;
596             if (idx == size) {
597               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
598                 ;
599               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
600             }
601           }
602         } else if (HT[idx] != key) {
603           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
604             ;
605           if (idx == size) {
606             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
607               ;
608             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
609           }
610         }
611         baij_a = HD[idx];
612         if (roworiented) {
613           /*value = v + i*(stepval+bs)*bs + j*bs;*/
614           /* value = v + (i*(stepval+bs)+j)*bs; */
615           value = v_t;
616           v_t += bs;
617           if (addv == ADD_VALUES) {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
620             }
621           } else {
622             for (ii = 0; ii < bs; ii++, value += stepval) {
623               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
624             }
625           }
626         } else {
627           value = v + j * (stepval + bs) * bs + i * bs;
628           if (addv == ADD_VALUES) {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
631             }
632           } else {
633             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
634               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
635             }
636           }
637         }
638       }
639     } else {
640       if (!baij->donotstash) {
641         if (roworiented) {
642           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         } else {
644           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
645         }
646       }
647     }
648   }
649   if (PetscDefined(USE_DEBUG)) {
650     baij->ht_total_ct += total_ct;
651     baij->ht_insert_ct += insert_ct;
652   }
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
657 {
658   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
659   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
660   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
661 
662   PetscFunctionBegin;
663   for (i = 0; i < m; i++) {
664     if (idxm[i] < 0) continue; /* negative row */
665     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
666     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
667       row = idxm[i] - bsrstart;
668       for (j = 0; j < n; j++) {
669         if (idxn[j] < 0) continue; /* negative column */
670         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
671         if (idxn[j] >= bscstart && idxn[j] < bscend) {
672           col = idxn[j] - bscstart;
673           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
674         } else {
675           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
676 #if defined(PETSC_USE_CTABLE)
677           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
678           data--;
679 #else
680           data = baij->colmap[idxn[j] / bs] - 1;
681 #endif
682           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
683           else {
684             col = data + idxn[j] % bs;
685             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
686           }
687         }
688       }
689     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
690   }
691   PetscFunctionReturn(PETSC_SUCCESS);
692 }
693 
694 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
695 {
696   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
697   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
698   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
699   PetscReal    sum = 0.0;
700   MatScalar   *v;
701 
702   PetscFunctionBegin;
703   if (baij->size == 1) {
704     PetscCall(MatNorm(baij->A, type, nrm));
705   } else {
706     if (type == NORM_FROBENIUS) {
707       v  = amat->a;
708       nz = amat->nz * bs2;
709       for (i = 0; i < nz; i++) {
710         sum += PetscRealPart(PetscConj(*v) * (*v));
711         v++;
712       }
713       v  = bmat->a;
714       nz = bmat->nz * bs2;
715       for (i = 0; i < nz; i++) {
716         sum += PetscRealPart(PetscConj(*v) * (*v));
717         v++;
718       }
719       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
720       *nrm = PetscSqrtReal(*nrm);
721     } else if (type == NORM_1) { /* max column sum */
722       PetscReal *tmp, *tmp2;
723       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
724       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
725       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
726       v  = amat->a;
727       jj = amat->j;
728       for (i = 0; i < amat->nz; i++) {
729         for (j = 0; j < bs; j++) {
730           col = bs * (cstart + *jj) + j; /* column index */
731           for (row = 0; row < bs; row++) {
732             tmp[col] += PetscAbsScalar(*v);
733             v++;
734           }
735         }
736         jj++;
737       }
738       v  = bmat->a;
739       jj = bmat->j;
740       for (i = 0; i < bmat->nz; i++) {
741         for (j = 0; j < bs; j++) {
742           col = bs * garray[*jj] + j;
743           for (row = 0; row < bs; row++) {
744             tmp[col] += PetscAbsScalar(*v);
745             v++;
746           }
747         }
748         jj++;
749       }
750       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
751       *nrm = 0.0;
752       for (j = 0; j < mat->cmap->N; j++) {
753         if (tmp2[j] > *nrm) *nrm = tmp2[j];
754       }
755       PetscCall(PetscFree(tmp));
756       PetscCall(PetscFree(tmp2));
757     } else if (type == NORM_INFINITY) { /* max row sum */
758       PetscReal *sums;
759       PetscCall(PetscMalloc1(bs, &sums));
760       sum = 0.0;
761       for (j = 0; j < amat->mbs; j++) {
762         for (row = 0; row < bs; row++) sums[row] = 0.0;
763         v  = amat->a + bs2 * amat->i[j];
764         nz = amat->i[j + 1] - amat->i[j];
765         for (i = 0; i < nz; i++) {
766           for (col = 0; col < bs; col++) {
767             for (row = 0; row < bs; row++) {
768               sums[row] += PetscAbsScalar(*v);
769               v++;
770             }
771           }
772         }
773         v  = bmat->a + bs2 * bmat->i[j];
774         nz = bmat->i[j + 1] - bmat->i[j];
775         for (i = 0; i < nz; i++) {
776           for (col = 0; col < bs; col++) {
777             for (row = 0; row < bs; row++) {
778               sums[row] += PetscAbsScalar(*v);
779               v++;
780             }
781           }
782         }
783         for (row = 0; row < bs; row++) {
784           if (sums[row] > sum) sum = sums[row];
785         }
786       }
787       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
788       PetscCall(PetscFree(sums));
789     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
790   }
791   PetscFunctionReturn(PETSC_SUCCESS);
792 }
793 
794 /*
795   Creates the hash table, and sets the table
796   This table is created only once.
797   If new entried need to be added to the matrix
798   then the hash table has to be destroyed and
799   recreated.
800 */
801 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
802 {
803   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
804   Mat          A = baij->A, B = baij->B;
805   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
806   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
807   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
808   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
809   PetscInt    *HT, key;
810   MatScalar  **HD;
811   PetscReal    tmp;
812 #if defined(PETSC_USE_INFO)
813   PetscInt ct = 0, max = 0;
814 #endif
815 
816   PetscFunctionBegin;
817   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
818 
819   baij->ht_size = (PetscInt)(factor * nz);
820   ht_size       = baij->ht_size;
821 
822   /* Allocate Memory for Hash Table */
823   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
824   HD = baij->hd;
825   HT = baij->ht;
826 
827   /* Loop Over A */
828   for (i = 0; i < a->mbs; i++) {
829     for (j = ai[i]; j < ai[i + 1]; j++) {
830       row = i + rstart;
831       col = aj[j] + cstart;
832 
833       key = row * Nbs + col + 1;
834       h1  = HASH(ht_size, key, tmp);
835       for (k = 0; k < ht_size; k++) {
836         if (!HT[(h1 + k) % ht_size]) {
837           HT[(h1 + k) % ht_size] = key;
838           HD[(h1 + k) % ht_size] = a->a + j * bs2;
839           break;
840 #if defined(PETSC_USE_INFO)
841         } else {
842           ct++;
843 #endif
844         }
845       }
846 #if defined(PETSC_USE_INFO)
847       if (k > max) max = k;
848 #endif
849     }
850   }
851   /* Loop Over B */
852   for (i = 0; i < b->mbs; i++) {
853     for (j = bi[i]; j < bi[i + 1]; j++) {
854       row = i + rstart;
855       col = garray[bj[j]];
856       key = row * Nbs + col + 1;
857       h1  = HASH(ht_size, key, tmp);
858       for (k = 0; k < ht_size; k++) {
859         if (!HT[(h1 + k) % ht_size]) {
860           HT[(h1 + k) % ht_size] = key;
861           HD[(h1 + k) % ht_size] = b->a + j * bs2;
862           break;
863 #if defined(PETSC_USE_INFO)
864         } else {
865           ct++;
866 #endif
867         }
868       }
869 #if defined(PETSC_USE_INFO)
870       if (k > max) max = k;
871 #endif
872     }
873   }
874 
875   /* Print Summary */
876 #if defined(PETSC_USE_INFO)
877   for (i = 0, j = 0; i < ht_size; i++) {
878     if (HT[i]) j++;
879   }
880   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
881 #endif
882   PetscFunctionReturn(PETSC_SUCCESS);
883 }
884 
885 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
886 {
887   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
888   PetscInt     nstash, reallocs;
889 
890   PetscFunctionBegin;
891   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
892 
893   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
894   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
895   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
898   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
899   PetscFunctionReturn(PETSC_SUCCESS);
900 }
901 
902 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
903 {
904   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
905   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
906   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
907   PetscInt    *row, *col;
908   PetscBool    r1, r2, r3, other_disassembled;
909   MatScalar   *val;
910   PetscMPIInt  n;
911 
912   PetscFunctionBegin;
913   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
914   if (!baij->donotstash && !mat->nooffprocentries) {
915     while (1) {
916       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
917       if (!flg) break;
918 
919       for (i = 0; i < n;) {
920         /* Now identify the consecutive vals belonging to the same row */
921         for (j = i, rstart = row[j]; j < n; j++) {
922           if (row[j] != rstart) break;
923         }
924         if (j < n) ncols = j - i;
925         else ncols = n - i;
926         /* Now assemble all these values with a single function call */
927         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
928         i = j;
929       }
930     }
931     PetscCall(MatStashScatterEnd_Private(&mat->stash));
932     /* Now process the block-stash. Since the values are stashed column-oriented,
933        set the roworiented flag to column oriented, and after MatSetValues()
934        restore the original flags */
935     r1 = baij->roworiented;
936     r2 = a->roworiented;
937     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
938 
939     baij->roworiented = PETSC_FALSE;
940     a->roworiented    = PETSC_FALSE;
941 
942     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
943     while (1) {
944       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
945       if (!flg) break;
946 
947       for (i = 0; i < n;) {
948         /* Now identify the consecutive vals belonging to the same row */
949         for (j = i, rstart = row[j]; j < n; j++) {
950           if (row[j] != rstart) break;
951         }
952         if (j < n) ncols = j - i;
953         else ncols = n - i;
954         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
955         i = j;
956       }
957     }
958     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
959 
960     baij->roworiented = r1;
961     a->roworiented    = r2;
962 
963     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
964   }
965 
966   PetscCall(MatAssemblyBegin(baij->A, mode));
967   PetscCall(MatAssemblyEnd(baij->A, mode));
968 
969   /* determine if any processor has disassembled, if so we must
970      also disassemble ourselves, in order that we may reassemble. */
971   /*
972      if nonzero structure of submatrix B cannot change then we know that
973      no processor disassembled thus we can skip this stuff
974   */
975   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
976     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
977     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
978   }
979 
980   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
981   PetscCall(MatAssemblyBegin(baij->B, mode));
982   PetscCall(MatAssemblyEnd(baij->B, mode));
983 
984 #if defined(PETSC_USE_INFO)
985   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
986     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
987 
988     baij->ht_total_ct  = 0;
989     baij->ht_insert_ct = 0;
990   }
991 #endif
992   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
993     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
994 
995     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
996     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
997   }
998 
999   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1000 
1001   baij->rowvalues = NULL;
1002 
1003   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1004   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1005     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1006     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1007   }
1008   PetscFunctionReturn(PETSC_SUCCESS);
1009 }
1010 
1011 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1012 #include <petscdraw.h>
1013 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1014 {
1015   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1016   PetscMPIInt       rank = baij->rank;
1017   PetscInt          bs   = mat->rmap->bs;
1018   PetscBool         iascii, isdraw;
1019   PetscViewer       sviewer;
1020   PetscViewerFormat format;
1021 
1022   PetscFunctionBegin;
1023   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1024   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1025   if (iascii) {
1026     PetscCall(PetscViewerGetFormat(viewer, &format));
1027     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1028       MatInfo info;
1029       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1030       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1031       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1032       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1033                                                    mat->rmap->bs, (double)info.memory));
1034       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1035       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1036       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1037       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1038       PetscCall(PetscViewerFlush(viewer));
1039       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1040       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1041       PetscCall(VecScatterView(baij->Mvctx, viewer));
1042       PetscFunctionReturn(PETSC_SUCCESS);
1043     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1044       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1045       PetscFunctionReturn(PETSC_SUCCESS);
1046     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1047       PetscFunctionReturn(PETSC_SUCCESS);
1048     }
1049   }
1050 
1051   if (isdraw) {
1052     PetscDraw draw;
1053     PetscBool isnull;
1054     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1055     PetscCall(PetscDrawIsNull(draw, &isnull));
1056     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1057   }
1058 
1059   {
1060     /* assemble the entire matrix onto first processor. */
1061     Mat          A;
1062     Mat_SeqBAIJ *Aloc;
1063     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1064     MatScalar   *a;
1065     const char  *matname;
1066 
1067     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1068     /* Perhaps this should be the type of mat? */
1069     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1070     if (rank == 0) {
1071       PetscCall(MatSetSizes(A, M, N, M, N));
1072     } else {
1073       PetscCall(MatSetSizes(A, 0, 0, M, N));
1074     }
1075     PetscCall(MatSetType(A, MATMPIBAIJ));
1076     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1077     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1078 
1079     /* copy over the A part */
1080     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1081     ai   = Aloc->i;
1082     aj   = Aloc->j;
1083     a    = Aloc->a;
1084     PetscCall(PetscMalloc1(bs, &rvals));
1085 
1086     for (i = 0; i < mbs; i++) {
1087       rvals[0] = bs * (baij->rstartbs + i);
1088       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1089       for (j = ai[i]; j < ai[i + 1]; j++) {
1090         col = (baij->cstartbs + aj[j]) * bs;
1091         for (k = 0; k < bs; k++) {
1092           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1093           col++;
1094           a += bs;
1095         }
1096       }
1097     }
1098     /* copy over the B part */
1099     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1100     ai   = Aloc->i;
1101     aj   = Aloc->j;
1102     a    = Aloc->a;
1103     for (i = 0; i < mbs; i++) {
1104       rvals[0] = bs * (baij->rstartbs + i);
1105       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1106       for (j = ai[i]; j < ai[i + 1]; j++) {
1107         col = baij->garray[aj[j]] * bs;
1108         for (k = 0; k < bs; k++) {
1109           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1110           col++;
1111           a += bs;
1112         }
1113       }
1114     }
1115     PetscCall(PetscFree(rvals));
1116     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1117     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1118     /*
1119        Everyone has to call to draw the matrix since the graphics waits are
1120        synchronized across all processors that share the PetscDraw object
1121     */
1122     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1123     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1124     if (rank == 0) {
1125       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1126       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1127     }
1128     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1129     PetscCall(PetscViewerFlush(viewer));
1130     PetscCall(MatDestroy(&A));
1131   }
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1136 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1137 {
1138   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1139   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1140   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1141   const PetscInt *garray = aij->garray;
1142   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1143   PetscInt       *rowlens, *colidxs;
1144   PetscScalar    *matvals;
1145 
1146   PetscFunctionBegin;
1147   PetscCall(PetscViewerSetUp(viewer));
1148 
1149   M  = mat->rmap->N;
1150   N  = mat->cmap->N;
1151   m  = mat->rmap->n;
1152   rs = mat->rmap->rstart;
1153   cs = mat->cmap->rstart;
1154   bs = mat->rmap->bs;
1155   nz = bs * bs * (A->nz + B->nz);
1156 
1157   /* write matrix header */
1158   header[0] = MAT_FILE_CLASSID;
1159   header[1] = M;
1160   header[2] = N;
1161   header[3] = nz;
1162   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1163   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1164 
1165   /* fill in and store row lengths */
1166   PetscCall(PetscMalloc1(m, &rowlens));
1167   for (cnt = 0, i = 0; i < A->mbs; i++)
1168     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1169   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1170   PetscCall(PetscFree(rowlens));
1171 
1172   /* fill in and store column indices */
1173   PetscCall(PetscMalloc1(nz, &colidxs));
1174   for (cnt = 0, i = 0; i < A->mbs; i++) {
1175     for (k = 0; k < bs; k++) {
1176       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1177         if (garray[B->j[jb]] > cs / bs) break;
1178         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1179       }
1180       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1182       for (; jb < B->i[i + 1]; jb++)
1183         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1184     }
1185   }
1186   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1187   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1188   PetscCall(PetscFree(colidxs));
1189 
1190   /* fill in and store nonzero values */
1191   PetscCall(PetscMalloc1(nz, &matvals));
1192   for (cnt = 0, i = 0; i < A->mbs; i++) {
1193     for (k = 0; k < bs; k++) {
1194       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1195         if (garray[B->j[jb]] > cs / bs) break;
1196         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1197       }
1198       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1199         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1200       for (; jb < B->i[i + 1]; jb++)
1201         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1202     }
1203   }
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1205   PetscCall(PetscFree(matvals));
1206 
1207   /* write block size option to the viewer's .info file */
1208   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1209   PetscFunctionReturn(PETSC_SUCCESS);
1210 }
1211 
1212 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1213 {
1214   PetscBool iascii, isdraw, issocket, isbinary;
1215 
1216   PetscFunctionBegin;
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1219   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1220   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1221   if (iascii || isdraw || issocket) {
1222     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1223   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1224   PetscFunctionReturn(PETSC_SUCCESS);
1225 }
1226 
1227 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1228 {
1229   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1230   PetscInt     nt;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(VecGetLocalSize(xx, &nt));
1234   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1235   PetscCall(VecGetLocalSize(yy, &nt));
1236   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1237   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1238   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1239   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1240   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1245 {
1246   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1247 
1248   PetscFunctionBegin;
1249   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1250   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1251   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1252   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1253   PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255 
1256 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1257 {
1258   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1259 
1260   PetscFunctionBegin;
1261   /* do nondiagonal part */
1262   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1263   /* do local part */
1264   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1265   /* add partial results together */
1266   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1267   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1272 {
1273   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1274 
1275   PetscFunctionBegin;
1276   /* do nondiagonal part */
1277   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1278   /* do local part */
1279   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1280   /* add partial results together */
1281   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1282   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1283   PetscFunctionReturn(PETSC_SUCCESS);
1284 }
1285 
1286 /*
1287   This only works correctly for square matrices where the subblock A->A is the
1288    diagonal block
1289 */
1290 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1291 {
1292   PetscFunctionBegin;
1293   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1294   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1295   PetscFunctionReturn(PETSC_SUCCESS);
1296 }
1297 
1298 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1299 {
1300   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1301 
1302   PetscFunctionBegin;
1303   PetscCall(MatScale(a->A, aa));
1304   PetscCall(MatScale(a->B, aa));
1305   PetscFunctionReturn(PETSC_SUCCESS);
1306 }
1307 
1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1309 {
1310   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1311   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1312   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1313   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1314   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1315 
1316   PetscFunctionBegin;
1317   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1318   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1319   mat->getrowactive = PETSC_TRUE;
1320 
1321   if (!mat->rowvalues && (idx || v)) {
1322     /*
1323         allocate enough space to hold information from the longest row.
1324     */
1325     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1326     PetscInt     max = 1, mbs = mat->mbs, tmp;
1327     for (i = 0; i < mbs; i++) {
1328       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1329       if (max < tmp) max = tmp;
1330     }
1331     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1332   }
1333   lrow = row - brstart;
1334 
1335   pvA = &vworkA;
1336   pcA = &cworkA;
1337   pvB = &vworkB;
1338   pcB = &cworkB;
1339   if (!v) {
1340     pvA = NULL;
1341     pvB = NULL;
1342   }
1343   if (!idx) {
1344     pcA = NULL;
1345     if (!v) pcB = NULL;
1346   }
1347   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1348   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1349   nztot = nzA + nzB;
1350 
1351   cmap = mat->garray;
1352   if (v || idx) {
1353     if (nztot) {
1354       /* Sort by increasing column numbers, assuming A and B already sorted */
1355       PetscInt imark = -1;
1356       if (v) {
1357         *v = v_p = mat->rowvalues;
1358         for (i = 0; i < nzB; i++) {
1359           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1360           else break;
1361         }
1362         imark = i;
1363         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1364         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1365       }
1366       if (idx) {
1367         *idx = idx_p = mat->rowindices;
1368         if (imark > -1) {
1369           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1370         } else {
1371           for (i = 0; i < nzB; i++) {
1372             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1373             else break;
1374           }
1375           imark = i;
1376         }
1377         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1378         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1379       }
1380     } else {
1381       if (idx) *idx = NULL;
1382       if (v) *v = NULL;
1383     }
1384   }
1385   *nz = nztot;
1386   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1387   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1392 {
1393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1394 
1395   PetscFunctionBegin;
1396   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1397   baij->getrowactive = PETSC_FALSE;
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1402 {
1403   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(MatZeroEntries(l->A));
1407   PetscCall(MatZeroEntries(l->B));
1408   PetscFunctionReturn(PETSC_SUCCESS);
1409 }
1410 
1411 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1412 {
1413   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1414   Mat            A = a->A, B = a->B;
1415   PetscLogDouble isend[5], irecv[5];
1416 
1417   PetscFunctionBegin;
1418   info->block_size = (PetscReal)matin->rmap->bs;
1419 
1420   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1421 
1422   isend[0] = info->nz_used;
1423   isend[1] = info->nz_allocated;
1424   isend[2] = info->nz_unneeded;
1425   isend[3] = info->memory;
1426   isend[4] = info->mallocs;
1427 
1428   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1429 
1430   isend[0] += info->nz_used;
1431   isend[1] += info->nz_allocated;
1432   isend[2] += info->nz_unneeded;
1433   isend[3] += info->memory;
1434   isend[4] += info->mallocs;
1435 
1436   if (flag == MAT_LOCAL) {
1437     info->nz_used      = isend[0];
1438     info->nz_allocated = isend[1];
1439     info->nz_unneeded  = isend[2];
1440     info->memory       = isend[3];
1441     info->mallocs      = isend[4];
1442   } else if (flag == MAT_GLOBAL_MAX) {
1443     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1444 
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else if (flag == MAT_GLOBAL_SUM) {
1451     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1452 
1453     info->nz_used      = irecv[0];
1454     info->nz_allocated = irecv[1];
1455     info->nz_unneeded  = irecv[2];
1456     info->memory       = irecv[3];
1457     info->mallocs      = irecv[4];
1458   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1459   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1460   info->fill_ratio_needed = 0;
1461   info->factor_mallocs    = 0;
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1466 {
1467   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1468 
1469   PetscFunctionBegin;
1470   switch (op) {
1471   case MAT_NEW_NONZERO_LOCATIONS:
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1474   case MAT_KEEP_NONZERO_PATTERN:
1475   case MAT_NEW_NONZERO_LOCATION_ERR:
1476     MatCheckPreallocated(A, 1);
1477     PetscCall(MatSetOption(a->A, op, flg));
1478     PetscCall(MatSetOption(a->B, op, flg));
1479     break;
1480   case MAT_ROW_ORIENTED:
1481     MatCheckPreallocated(A, 1);
1482     a->roworiented = flg;
1483 
1484     PetscCall(MatSetOption(a->A, op, flg));
1485     PetscCall(MatSetOption(a->B, op, flg));
1486     break;
1487   case MAT_FORCE_DIAGONAL_ENTRIES:
1488   case MAT_SORTED_FULL:
1489     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1490     break;
1491   case MAT_IGNORE_OFF_PROC_ENTRIES:
1492     a->donotstash = flg;
1493     break;
1494   case MAT_USE_HASH_TABLE:
1495     a->ht_flag = flg;
1496     a->ht_fact = 1.39;
1497     break;
1498   case MAT_SYMMETRIC:
1499   case MAT_STRUCTURALLY_SYMMETRIC:
1500   case MAT_HERMITIAN:
1501   case MAT_SUBMAT_SINGLEIS:
1502   case MAT_SYMMETRY_ETERNAL:
1503   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1504   case MAT_SPD_ETERNAL:
1505     /* if the diagonal matrix is square it inherits some of the properties above */
1506     break;
1507   default:
1508     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1509   }
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 }
1512 
1513 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1514 {
1515   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1516   Mat_SeqBAIJ *Aloc;
1517   Mat          B;
1518   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1519   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1520   MatScalar   *a;
1521 
1522   PetscFunctionBegin;
1523   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1524   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1525     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1526     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1527     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1528     /* Do not know preallocation information, but must set block size */
1529     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1530   } else {
1531     B = *matout;
1532   }
1533 
1534   /* copy over the A part */
1535   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1536   ai   = Aloc->i;
1537   aj   = Aloc->j;
1538   a    = Aloc->a;
1539   PetscCall(PetscMalloc1(bs, &rvals));
1540 
1541   for (i = 0; i < mbs; i++) {
1542     rvals[0] = bs * (baij->rstartbs + i);
1543     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1544     for (j = ai[i]; j < ai[i + 1]; j++) {
1545       col = (baij->cstartbs + aj[j]) * bs;
1546       for (k = 0; k < bs; k++) {
1547         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1548 
1549         col++;
1550         a += bs;
1551       }
1552     }
1553   }
1554   /* copy over the B part */
1555   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1556   ai   = Aloc->i;
1557   aj   = Aloc->j;
1558   a    = Aloc->a;
1559   for (i = 0; i < mbs; i++) {
1560     rvals[0] = bs * (baij->rstartbs + i);
1561     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1562     for (j = ai[i]; j < ai[i + 1]; j++) {
1563       col = baij->garray[aj[j]] * bs;
1564       for (k = 0; k < bs; k++) {
1565         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1566         col++;
1567         a += bs;
1568       }
1569     }
1570   }
1571   PetscCall(PetscFree(rvals));
1572   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1574 
1575   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1576   else PetscCall(MatHeaderMerge(A, &B));
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1581 {
1582   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1583   Mat          a = baij->A, b = baij->B;
1584   PetscInt     s1, s2, s3;
1585 
1586   PetscFunctionBegin;
1587   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1588   if (rr) {
1589     PetscCall(VecGetLocalSize(rr, &s1));
1590     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1591     /* Overlap communication with computation. */
1592     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593   }
1594   if (ll) {
1595     PetscCall(VecGetLocalSize(ll, &s1));
1596     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1597     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1598   }
1599   /* scale  the diagonal block */
1600   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1601 
1602   if (rr) {
1603     /* Do a scatter end and then right scale the off-diagonal block */
1604     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1605     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1606   }
1607   PetscFunctionReturn(PETSC_SUCCESS);
1608 }
1609 
1610 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1611 {
1612   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1613   PetscInt    *lrows;
1614   PetscInt     r, len;
1615   PetscBool    cong;
1616 
1617   PetscFunctionBegin;
1618   /* get locally owned rows */
1619   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1620   /* fix right hand side if needed */
1621   if (x && b) {
1622     const PetscScalar *xx;
1623     PetscScalar       *bb;
1624 
1625     PetscCall(VecGetArrayRead(x, &xx));
1626     PetscCall(VecGetArray(b, &bb));
1627     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1628     PetscCall(VecRestoreArrayRead(x, &xx));
1629     PetscCall(VecRestoreArray(b, &bb));
1630   }
1631 
1632   /* actually zap the local rows */
1633   /*
1634         Zero the required rows. If the "diagonal block" of the matrix
1635      is square and the user wishes to set the diagonal we use separate
1636      code so that MatSetValues() is not called for each diagonal allocating
1637      new memory, thus calling lots of mallocs and slowing things down.
1638 
1639   */
1640   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1641   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1642   PetscCall(MatHasCongruentLayouts(A, &cong));
1643   if ((diag != 0.0) && cong) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1645   } else if (diag != 0.0) {
1646     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1647     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1648        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1649     for (r = 0; r < len; ++r) {
1650       const PetscInt row = lrows[r] + A->rmap->rstart;
1651       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1652     }
1653     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1654     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1655   } else {
1656     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1657   }
1658   PetscCall(PetscFree(lrows));
1659 
1660   /* only change matrix nonzero state if pattern was allowed to be changed */
1661   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1662     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1663     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1664   }
1665   PetscFunctionReturn(PETSC_SUCCESS);
1666 }
1667 
1668 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1669 {
1670   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1671   PetscMPIInt        n = A->rmap->n, p = 0;
1672   PetscInt           i, j, k, r, len = 0, row, col, count;
1673   PetscInt          *lrows, *owners = A->rmap->range;
1674   PetscSFNode       *rrows;
1675   PetscSF            sf;
1676   const PetscScalar *xx;
1677   PetscScalar       *bb, *mask;
1678   Vec                xmask, lmask;
1679   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1680   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1681   PetscScalar       *aa;
1682 
1683   PetscFunctionBegin;
1684   /* Create SF where leaves are input rows and roots are owned rows */
1685   PetscCall(PetscMalloc1(n, &lrows));
1686   for (r = 0; r < n; ++r) lrows[r] = -1;
1687   PetscCall(PetscMalloc1(N, &rrows));
1688   for (r = 0; r < N; ++r) {
1689     const PetscInt idx = rows[r];
1690     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1691     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1692       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1693     }
1694     rrows[r].rank  = p;
1695     rrows[r].index = rows[r] - owners[p];
1696   }
1697   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1698   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1699   /* Collect flags for rows to be zeroed */
1700   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1701   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1702   PetscCall(PetscSFDestroy(&sf));
1703   /* Compress and put in row numbers */
1704   for (r = 0; r < n; ++r)
1705     if (lrows[r] >= 0) lrows[len++] = r;
1706   /* zero diagonal part of matrix */
1707   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1708   /* handle off diagonal part of matrix */
1709   PetscCall(MatCreateVecs(A, &xmask, NULL));
1710   PetscCall(VecDuplicate(l->lvec, &lmask));
1711   PetscCall(VecGetArray(xmask, &bb));
1712   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1713   PetscCall(VecRestoreArray(xmask, &bb));
1714   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1715   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1716   PetscCall(VecDestroy(&xmask));
1717   if (x) {
1718     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1719     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1720     PetscCall(VecGetArrayRead(l->lvec, &xx));
1721     PetscCall(VecGetArray(b, &bb));
1722   }
1723   PetscCall(VecGetArray(lmask, &mask));
1724   /* remove zeroed rows of off diagonal matrix */
1725   for (i = 0; i < len; ++i) {
1726     row   = lrows[i];
1727     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1728     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1729     for (k = 0; k < count; ++k) {
1730       aa[0] = 0.0;
1731       aa += bs;
1732     }
1733   }
1734   /* loop over all elements of off process part of matrix zeroing removed columns*/
1735   for (i = 0; i < l->B->rmap->N; ++i) {
1736     row = i / bs;
1737     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1738       for (k = 0; k < bs; ++k) {
1739         col = bs * baij->j[j] + k;
1740         if (PetscAbsScalar(mask[col])) {
1741           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1742           if (x) bb[i] -= aa[0] * xx[col];
1743           aa[0] = 0.0;
1744         }
1745       }
1746     }
1747   }
1748   if (x) {
1749     PetscCall(VecRestoreArray(b, &bb));
1750     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1751   }
1752   PetscCall(VecRestoreArray(lmask, &mask));
1753   PetscCall(VecDestroy(&lmask));
1754   PetscCall(PetscFree(lrows));
1755 
1756   /* only change matrix nonzero state if pattern was allowed to be changed */
1757   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1758     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1759     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1760   }
1761   PetscFunctionReturn(PETSC_SUCCESS);
1762 }
1763 
1764 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1765 {
1766   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1767 
1768   PetscFunctionBegin;
1769   PetscCall(MatSetUnfactored(a->A));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1774 
1775 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1776 {
1777   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1778   Mat          a, b, c, d;
1779   PetscBool    flg;
1780 
1781   PetscFunctionBegin;
1782   a = matA->A;
1783   b = matA->B;
1784   c = matB->A;
1785   d = matB->B;
1786 
1787   PetscCall(MatEqual(a, c, &flg));
1788   if (flg) PetscCall(MatEqual(b, d, &flg));
1789   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1790   PetscFunctionReturn(PETSC_SUCCESS);
1791 }
1792 
1793 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1794 {
1795   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1796   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     PetscCall(MatCopy_Basic(A, B, str));
1802   } else {
1803     PetscCall(MatCopy(a->A, b->A, str));
1804     PetscCall(MatCopy(a->B, b->B, str));
1805   }
1806   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1811 {
1812   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1813   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1814   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1815 
1816   PetscFunctionBegin;
1817   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1822 {
1823   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1824   PetscBLASInt bnz, one                         = 1;
1825   Mat_SeqBAIJ *x, *y;
1826   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1827 
1828   PetscFunctionBegin;
1829   if (str == SAME_NONZERO_PATTERN) {
1830     PetscScalar alpha = a;
1831     x                 = (Mat_SeqBAIJ *)xx->A->data;
1832     y                 = (Mat_SeqBAIJ *)yy->A->data;
1833     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1834     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1835     x = (Mat_SeqBAIJ *)xx->B->data;
1836     y = (Mat_SeqBAIJ *)yy->B->data;
1837     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1838     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1839     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1840   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1841     PetscCall(MatAXPY_Basic(Y, a, X, str));
1842   } else {
1843     Mat       B;
1844     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1845     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1846     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1847     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1848     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1849     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1850     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1851     PetscCall(MatSetType(B, MATMPIBAIJ));
1852     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1853     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1854     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1855     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1856     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1857     PetscCall(MatHeaderMerge(Y, &B));
1858     PetscCall(PetscFree(nnz_d));
1859     PetscCall(PetscFree(nnz_o));
1860   }
1861   PetscFunctionReturn(PETSC_SUCCESS);
1862 }
1863 
1864 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1865 {
1866   PetscFunctionBegin;
1867   if (PetscDefined(USE_COMPLEX)) {
1868     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1869 
1870     PetscCall(MatConjugate_SeqBAIJ(a->A));
1871     PetscCall(MatConjugate_SeqBAIJ(a->B));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1877 {
1878   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1879 
1880   PetscFunctionBegin;
1881   PetscCall(MatRealPart(a->A));
1882   PetscCall(MatRealPart(a->B));
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1887 {
1888   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(MatImaginaryPart(a->A));
1892   PetscCall(MatImaginaryPart(a->B));
1893   PetscFunctionReturn(PETSC_SUCCESS);
1894 }
1895 
1896 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1897 {
1898   IS       iscol_local;
1899   PetscInt csize;
1900 
1901   PetscFunctionBegin;
1902   PetscCall(ISGetLocalSize(iscol, &csize));
1903   if (call == MAT_REUSE_MATRIX) {
1904     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1905     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1906   } else {
1907     PetscCall(ISAllGather(iscol, &iscol_local));
1908   }
1909   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1910   if (call == MAT_INITIAL_MATRIX) {
1911     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1912     PetscCall(ISDestroy(&iscol_local));
1913   }
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 /*
1918   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1919   in local and then by concatenating the local matrices the end result.
1920   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1921   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1922 */
1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1924 {
1925   PetscMPIInt  rank, size;
1926   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1927   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1928   Mat          M, Mreuse;
1929   MatScalar   *vwork, *aa;
1930   MPI_Comm     comm;
1931   IS           isrow_new, iscol_new;
1932   Mat_SeqBAIJ *aij;
1933 
1934   PetscFunctionBegin;
1935   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1936   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1937   PetscCallMPI(MPI_Comm_size(comm, &size));
1938   /* The compression and expansion should be avoided. Doesn't point
1939      out errors, might change the indices, hence buggey */
1940   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1941   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1942 
1943   if (call == MAT_REUSE_MATRIX) {
1944     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1945     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1946     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1947   } else {
1948     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1949   }
1950   PetscCall(ISDestroy(&isrow_new));
1951   PetscCall(ISDestroy(&iscol_new));
1952   /*
1953       m - number of local rows
1954       n - number of columns (same on all processors)
1955       rstart - first row in new global matrix generated
1956   */
1957   PetscCall(MatGetBlockSize(mat, &bs));
1958   PetscCall(MatGetSize(Mreuse, &m, &n));
1959   m = m / bs;
1960   n = n / bs;
1961 
1962   if (call == MAT_INITIAL_MATRIX) {
1963     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1964     ii  = aij->i;
1965     jj  = aij->j;
1966 
1967     /*
1968         Determine the number of non-zeros in the diagonal and off-diagonal
1969         portions of the matrix in order to do correct preallocation
1970     */
1971 
1972     /* first get start and end of "diagonal" columns */
1973     if (csize == PETSC_DECIDE) {
1974       PetscCall(ISGetSize(isrow, &mglobal));
1975       if (mglobal == n * bs) { /* square matrix */
1976         nlocal = m;
1977       } else {
1978         nlocal = n / size + ((n % size) > rank);
1979       }
1980     } else {
1981       nlocal = csize / bs;
1982     }
1983     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1984     rstart = rend - nlocal;
1985     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1986 
1987     /* next, compute all the lengths */
1988     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1989     for (i = 0; i < m; i++) {
1990       jend = ii[i + 1] - ii[i];
1991       olen = 0;
1992       dlen = 0;
1993       for (j = 0; j < jend; j++) {
1994         if (*jj < rstart || *jj >= rend) olen++;
1995         else dlen++;
1996         jj++;
1997       }
1998       olens[i] = olen;
1999       dlens[i] = dlen;
2000     }
2001     PetscCall(MatCreate(comm, &M));
2002     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2003     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
2004     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2005     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2006     PetscCall(PetscFree2(dlens, olens));
2007   } else {
2008     PetscInt ml, nl;
2009 
2010     M = *newmat;
2011     PetscCall(MatGetLocalSize(M, &ml, &nl));
2012     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2013     PetscCall(MatZeroEntries(M));
2014     /*
2015          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2016        rather than the slower MatSetValues().
2017     */
2018     M->was_assembled = PETSC_TRUE;
2019     M->assembled     = PETSC_FALSE;
2020   }
2021   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2022   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2023   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2024   ii  = aij->i;
2025   jj  = aij->j;
2026   aa  = aij->a;
2027   for (i = 0; i < m; i++) {
2028     row   = rstart / bs + i;
2029     nz    = ii[i + 1] - ii[i];
2030     cwork = jj;
2031     jj += nz;
2032     vwork = aa;
2033     aa += nz * bs * bs;
2034     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2035   }
2036 
2037   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2038   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2039   *newmat = M;
2040 
2041   /* save submatrix used in processor for next request */
2042   if (call == MAT_INITIAL_MATRIX) {
2043     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2044     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2045   }
2046   PetscFunctionReturn(PETSC_SUCCESS);
2047 }
2048 
2049 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2050 {
2051   MPI_Comm        comm, pcomm;
2052   PetscInt        clocal_size, nrows;
2053   const PetscInt *rows;
2054   PetscMPIInt     size;
2055   IS              crowp, lcolp;
2056 
2057   PetscFunctionBegin;
2058   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2059   /* make a collective version of 'rowp' */
2060   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2061   if (pcomm == comm) {
2062     crowp = rowp;
2063   } else {
2064     PetscCall(ISGetSize(rowp, &nrows));
2065     PetscCall(ISGetIndices(rowp, &rows));
2066     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2067     PetscCall(ISRestoreIndices(rowp, &rows));
2068   }
2069   PetscCall(ISSetPermutation(crowp));
2070   /* make a local version of 'colp' */
2071   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2072   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2073   if (size == 1) {
2074     lcolp = colp;
2075   } else {
2076     PetscCall(ISAllGather(colp, &lcolp));
2077   }
2078   PetscCall(ISSetPermutation(lcolp));
2079   /* now we just get the submatrix */
2080   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2081   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2082   /* clean up */
2083   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2084   if (size > 1) PetscCall(ISDestroy(&lcolp));
2085   PetscFunctionReturn(PETSC_SUCCESS);
2086 }
2087 
2088 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2089 {
2090   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2091   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2092 
2093   PetscFunctionBegin;
2094   if (nghosts) *nghosts = B->nbs;
2095   if (ghosts) *ghosts = baij->garray;
2096   PetscFunctionReturn(PETSC_SUCCESS);
2097 }
2098 
2099 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2100 {
2101   Mat          B;
2102   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2103   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2104   Mat_SeqAIJ  *b;
2105   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2106   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2107   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2108 
2109   PetscFunctionBegin;
2110   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2111   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2112 
2113   /*   Tell every processor the number of nonzeros per row  */
2114   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2115   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2116   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2117   displs = recvcounts + size;
2118   for (i = 0; i < size; i++) {
2119     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2120     displs[i]     = A->rmap->range[i] / bs;
2121   }
2122   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2123   /* Create the sequential matrix of the same type as the local block diagonal  */
2124   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2125   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2126   PetscCall(MatSetType(B, MATSEQAIJ));
2127   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2128   b = (Mat_SeqAIJ *)B->data;
2129 
2130   /*     Copy my part of matrix column indices over  */
2131   sendcount  = ad->nz + bd->nz;
2132   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2133   a_jsendbuf = ad->j;
2134   b_jsendbuf = bd->j;
2135   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2136   cnt        = 0;
2137   for (i = 0; i < n; i++) {
2138     /* put in lower diagonal portion */
2139     m = bd->i[i + 1] - bd->i[i];
2140     while (m > 0) {
2141       /* is it above diagonal (in bd (compressed) numbering) */
2142       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2143       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2144       m--;
2145     }
2146 
2147     /* put in diagonal portion */
2148     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2149 
2150     /* put in upper diagonal portion */
2151     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2152   }
2153   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2154 
2155   /*  Gather all column indices to all processors  */
2156   for (i = 0; i < size; i++) {
2157     recvcounts[i] = 0;
2158     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2159   }
2160   displs[0] = 0;
2161   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2162   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2163   /*  Assemble the matrix into useable form (note numerical values not yet set)  */
2164   /* set the b->ilen (length of each row) values */
2165   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2166   /* set the b->i indices */
2167   b->i[0] = 0;
2168   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2169   PetscCall(PetscFree(lens));
2170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2172   PetscCall(PetscFree(recvcounts));
2173 
2174   PetscCall(MatPropagateSymmetryOptions(A, B));
2175   *newmat = B;
2176   PetscFunctionReturn(PETSC_SUCCESS);
2177 }
2178 
2179 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2180 {
2181   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2182   Vec          bb1 = NULL;
2183 
2184   PetscFunctionBegin;
2185   if (flag == SOR_APPLY_UPPER) {
2186     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2187     PetscFunctionReturn(PETSC_SUCCESS);
2188   }
2189 
2190   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2191 
2192   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2193     if (flag & SOR_ZERO_INITIAL_GUESS) {
2194       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2195       its--;
2196     }
2197 
2198     while (its--) {
2199       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2200       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2201 
2202       /* update rhs: bb1 = bb - B*x */
2203       PetscCall(VecScale(mat->lvec, -1.0));
2204       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2205 
2206       /* local sweep */
2207       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2208     }
2209   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2210     if (flag & SOR_ZERO_INITIAL_GUESS) {
2211       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2212       its--;
2213     }
2214     while (its--) {
2215       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2216       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2217 
2218       /* update rhs: bb1 = bb - B*x */
2219       PetscCall(VecScale(mat->lvec, -1.0));
2220       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2221 
2222       /* local sweep */
2223       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2224     }
2225   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2226     if (flag & SOR_ZERO_INITIAL_GUESS) {
2227       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2228       its--;
2229     }
2230     while (its--) {
2231       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2232       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2233 
2234       /* update rhs: bb1 = bb - B*x */
2235       PetscCall(VecScale(mat->lvec, -1.0));
2236       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2237 
2238       /* local sweep */
2239       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2240     }
2241   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2242 
2243   PetscCall(VecDestroy(&bb1));
2244   PetscFunctionReturn(PETSC_SUCCESS);
2245 }
2246 
2247 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2248 {
2249   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2250   PetscInt     m, N, i, *garray = aij->garray;
2251   PetscInt     ib, jb, bs = A->rmap->bs;
2252   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2253   MatScalar   *a_val = a_aij->a;
2254   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2255   MatScalar   *b_val = b_aij->a;
2256   PetscReal   *work;
2257 
2258   PetscFunctionBegin;
2259   PetscCall(MatGetSize(A, &m, &N));
2260   PetscCall(PetscCalloc1(N, &work));
2261   if (type == NORM_2) {
2262     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2263       for (jb = 0; jb < bs; jb++) {
2264         for (ib = 0; ib < bs; ib++) {
2265           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2266           a_val++;
2267         }
2268       }
2269     }
2270     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2271       for (jb = 0; jb < bs; jb++) {
2272         for (ib = 0; ib < bs; ib++) {
2273           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2274           b_val++;
2275         }
2276       }
2277     }
2278   } else if (type == NORM_1) {
2279     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2280       for (jb = 0; jb < bs; jb++) {
2281         for (ib = 0; ib < bs; ib++) {
2282           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2283           a_val++;
2284         }
2285       }
2286     }
2287     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2288       for (jb = 0; jb < bs; jb++) {
2289         for (ib = 0; ib < bs; ib++) {
2290           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2291           b_val++;
2292         }
2293       }
2294     }
2295   } else if (type == NORM_INFINITY) {
2296     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2297       for (jb = 0; jb < bs; jb++) {
2298         for (ib = 0; ib < bs; ib++) {
2299           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2300           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2301           a_val++;
2302         }
2303       }
2304     }
2305     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2306       for (jb = 0; jb < bs; jb++) {
2307         for (ib = 0; ib < bs; ib++) {
2308           int col   = garray[b_aij->j[i]] * bs + jb;
2309           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2310           b_val++;
2311         }
2312       }
2313     }
2314   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2315     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2316       for (jb = 0; jb < bs; jb++) {
2317         for (ib = 0; ib < bs; ib++) {
2318           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2319           a_val++;
2320         }
2321       }
2322     }
2323     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2324       for (jb = 0; jb < bs; jb++) {
2325         for (ib = 0; ib < bs; ib++) {
2326           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2327           b_val++;
2328         }
2329       }
2330     }
2331   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2332     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2333       for (jb = 0; jb < bs; jb++) {
2334         for (ib = 0; ib < bs; ib++) {
2335           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2336           a_val++;
2337         }
2338       }
2339     }
2340     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2341       for (jb = 0; jb < bs; jb++) {
2342         for (ib = 0; ib < bs; ib++) {
2343           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2344           b_val++;
2345         }
2346       }
2347     }
2348   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2349   if (type == NORM_INFINITY) {
2350     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2351   } else {
2352     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2353   }
2354   PetscCall(PetscFree(work));
2355   if (type == NORM_2) {
2356     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2357   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2358     for (i = 0; i < N; i++) reductions[i] /= m;
2359   }
2360   PetscFunctionReturn(PETSC_SUCCESS);
2361 }
2362 
2363 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2364 {
2365   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2366 
2367   PetscFunctionBegin;
2368   PetscCall(MatInvertBlockDiagonal(a->A, values));
2369   A->factorerrortype             = a->A->factorerrortype;
2370   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2371   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2372   PetscFunctionReturn(PETSC_SUCCESS);
2373 }
2374 
2375 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2376 {
2377   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2378   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2379 
2380   PetscFunctionBegin;
2381   if (!Y->preallocated) {
2382     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2383   } else if (!aij->nz) {
2384     PetscInt nonew = aij->nonew;
2385     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2386     aij->nonew = nonew;
2387   }
2388   PetscCall(MatShift_Basic(Y, a));
2389   PetscFunctionReturn(PETSC_SUCCESS);
2390 }
2391 
2392 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2393 {
2394   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2395 
2396   PetscFunctionBegin;
2397   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2398   PetscCall(MatMissingDiagonal(a->A, missing, d));
2399   if (d) {
2400     PetscInt rstart;
2401     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2402     *d += rstart / A->rmap->bs;
2403   }
2404   PetscFunctionReturn(PETSC_SUCCESS);
2405 }
2406 
2407 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2408 {
2409   PetscFunctionBegin;
2410   *a = ((Mat_MPIBAIJ *)A->data)->A;
2411   PetscFunctionReturn(PETSC_SUCCESS);
2412 }
2413 
2414 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2415                                        MatGetRow_MPIBAIJ,
2416                                        MatRestoreRow_MPIBAIJ,
2417                                        MatMult_MPIBAIJ,
2418                                        /* 4*/ MatMultAdd_MPIBAIJ,
2419                                        MatMultTranspose_MPIBAIJ,
2420                                        MatMultTransposeAdd_MPIBAIJ,
2421                                        NULL,
2422                                        NULL,
2423                                        NULL,
2424                                        /*10*/ NULL,
2425                                        NULL,
2426                                        NULL,
2427                                        MatSOR_MPIBAIJ,
2428                                        MatTranspose_MPIBAIJ,
2429                                        /*15*/ MatGetInfo_MPIBAIJ,
2430                                        MatEqual_MPIBAIJ,
2431                                        MatGetDiagonal_MPIBAIJ,
2432                                        MatDiagonalScale_MPIBAIJ,
2433                                        MatNorm_MPIBAIJ,
2434                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2435                                        MatAssemblyEnd_MPIBAIJ,
2436                                        MatSetOption_MPIBAIJ,
2437                                        MatZeroEntries_MPIBAIJ,
2438                                        /*24*/ MatZeroRows_MPIBAIJ,
2439                                        NULL,
2440                                        NULL,
2441                                        NULL,
2442                                        NULL,
2443                                        /*29*/ MatSetUp_MPI_Hash,
2444                                        NULL,
2445                                        NULL,
2446                                        MatGetDiagonalBlock_MPIBAIJ,
2447                                        NULL,
2448                                        /*34*/ MatDuplicate_MPIBAIJ,
2449                                        NULL,
2450                                        NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        /*39*/ MatAXPY_MPIBAIJ,
2454                                        MatCreateSubMatrices_MPIBAIJ,
2455                                        MatIncreaseOverlap_MPIBAIJ,
2456                                        MatGetValues_MPIBAIJ,
2457                                        MatCopy_MPIBAIJ,
2458                                        /*44*/ NULL,
2459                                        MatScale_MPIBAIJ,
2460                                        MatShift_MPIBAIJ,
2461                                        NULL,
2462                                        MatZeroRowsColumns_MPIBAIJ,
2463                                        /*49*/ NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        NULL,
2467                                        NULL,
2468                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2469                                        NULL,
2470                                        MatSetUnfactored_MPIBAIJ,
2471                                        MatPermute_MPIBAIJ,
2472                                        MatSetValuesBlocked_MPIBAIJ,
2473                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2474                                        MatDestroy_MPIBAIJ,
2475                                        MatView_MPIBAIJ,
2476                                        NULL,
2477                                        NULL,
2478                                        /*64*/ NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        NULL,
2483                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2484                                        NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        NULL,
2488                                        /*74*/ NULL,
2489                                        MatFDColoringApply_BAIJ,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        /*79*/ NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        MatLoad_MPIBAIJ,
2498                                        /*84*/ NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        /*89*/ NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        /*94*/ NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        NULL,
2513                                        /*99*/ NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        MatConjugate_MPIBAIJ,
2517                                        NULL,
2518                                        /*104*/ NULL,
2519                                        MatRealPart_MPIBAIJ,
2520                                        MatImaginaryPart_MPIBAIJ,
2521                                        NULL,
2522                                        NULL,
2523                                        /*109*/ NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        MatMissingDiagonal_MPIBAIJ,
2528                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2529                                        NULL,
2530                                        MatGetGhosts_MPIBAIJ,
2531                                        NULL,
2532                                        NULL,
2533                                        /*119*/ NULL,
2534                                        NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        MatGetMultiProcBlock_MPIBAIJ,
2538                                        /*124*/ NULL,
2539                                        MatGetColumnReductions_MPIBAIJ,
2540                                        MatInvertBlockDiagonal_MPIBAIJ,
2541                                        NULL,
2542                                        NULL,
2543                                        /*129*/ NULL,
2544                                        NULL,
2545                                        NULL,
2546                                        NULL,
2547                                        NULL,
2548                                        /*134*/ NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        NULL,
2552                                        NULL,
2553                                        /*139*/ MatSetBlockSizes_Default,
2554                                        NULL,
2555                                        NULL,
2556                                        MatFDColoringSetUp_MPIXAIJ,
2557                                        NULL,
2558                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        /*150*/ NULL,
2565                                        NULL};
2566 
2567 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2568 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2569 
2570 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2571 {
2572   PetscInt        m, rstart, cstart, cend;
2573   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2574   const PetscInt *JJ          = NULL;
2575   PetscScalar    *values      = NULL;
2576   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2577   PetscBool       nooffprocentries;
2578 
2579   PetscFunctionBegin;
2580   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2581   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2582   PetscCall(PetscLayoutSetUp(B->rmap));
2583   PetscCall(PetscLayoutSetUp(B->cmap));
2584   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2585   m      = B->rmap->n / bs;
2586   rstart = B->rmap->rstart / bs;
2587   cstart = B->cmap->rstart / bs;
2588   cend   = B->cmap->rend / bs;
2589 
2590   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2591   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2592   for (i = 0; i < m; i++) {
2593     nz = ii[i + 1] - ii[i];
2594     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2595     nz_max = PetscMax(nz_max, nz);
2596     dlen   = 0;
2597     olen   = 0;
2598     JJ     = jj + ii[i];
2599     for (j = 0; j < nz; j++) {
2600       if (*JJ < cstart || *JJ >= cend) olen++;
2601       else dlen++;
2602       JJ++;
2603     }
2604     d_nnz[i] = dlen;
2605     o_nnz[i] = olen;
2606   }
2607   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2608   PetscCall(PetscFree2(d_nnz, o_nnz));
2609 
2610   values = (PetscScalar *)V;
2611   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2612   for (i = 0; i < m; i++) {
2613     PetscInt        row   = i + rstart;
2614     PetscInt        ncols = ii[i + 1] - ii[i];
2615     const PetscInt *icols = jj + ii[i];
2616     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2617       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2618       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2619     } else { /* block ordering does not match so we can only insert one block at a time. */
2620       PetscInt j;
2621       for (j = 0; j < ncols; j++) {
2622         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2623         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2624       }
2625     }
2626   }
2627 
2628   if (!V) PetscCall(PetscFree(values));
2629   nooffprocentries    = B->nooffprocentries;
2630   B->nooffprocentries = PETSC_TRUE;
2631   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2632   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2633   B->nooffprocentries = nooffprocentries;
2634 
2635   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2636   PetscFunctionReturn(PETSC_SUCCESS);
2637 }
2638 
2639 /*@C
2640    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2641 
2642    Collective
2643 
2644    Input Parameters:
2645 +  B - the matrix
2646 .  bs - the block size
2647 .  i - the indices into j for the start of each local row (starts with zero)
2648 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2649 -  v - optional values in the matrix
2650 
2651    Level: advanced
2652 
2653    Notes:
2654     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2655    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2656    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2657    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2658    block column and the second index is over columns within a block.
2659 
2660    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2661 
2662 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2663 @*/
2664 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2665 {
2666   PetscFunctionBegin;
2667   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2668   PetscValidType(B, 1);
2669   PetscValidLogicalCollectiveInt(B, bs, 2);
2670   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2671   PetscFunctionReturn(PETSC_SUCCESS);
2672 }
2673 
2674 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2675 {
2676   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2677   PetscInt     i;
2678   PetscMPIInt  size;
2679 
2680   PetscFunctionBegin;
2681   if (B->hash_active) {
2682     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
2683     B->hash_active = PETSC_FALSE;
2684   }
2685   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2686   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2687   PetscCall(PetscLayoutSetUp(B->rmap));
2688   PetscCall(PetscLayoutSetUp(B->cmap));
2689   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2690 
2691   if (d_nnz) {
2692     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2693   }
2694   if (o_nnz) {
2695     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2696   }
2697 
2698   b->bs2 = bs * bs;
2699   b->mbs = B->rmap->n / bs;
2700   b->nbs = B->cmap->n / bs;
2701   b->Mbs = B->rmap->N / bs;
2702   b->Nbs = B->cmap->N / bs;
2703 
2704   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2705   b->rstartbs = B->rmap->rstart / bs;
2706   b->rendbs   = B->rmap->rend / bs;
2707   b->cstartbs = B->cmap->rstart / bs;
2708   b->cendbs   = B->cmap->rend / bs;
2709 
2710 #if defined(PETSC_USE_CTABLE)
2711   PetscCall(PetscHMapIDestroy(&b->colmap));
2712 #else
2713   PetscCall(PetscFree(b->colmap));
2714 #endif
2715   PetscCall(PetscFree(b->garray));
2716   PetscCall(VecDestroy(&b->lvec));
2717   PetscCall(VecScatterDestroy(&b->Mvctx));
2718 
2719   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2720   PetscCall(MatDestroy(&b->B));
2721   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2722   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2723   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2724 
2725   PetscCall(MatDestroy(&b->A));
2726   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2727   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2728   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2729 
2730   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2731   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2732   B->preallocated  = PETSC_TRUE;
2733   B->was_assembled = PETSC_FALSE;
2734   B->assembled     = PETSC_FALSE;
2735   PetscFunctionReturn(PETSC_SUCCESS);
2736 }
2737 
2738 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2739 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2740 
2741 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2742 {
2743   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2744   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2745   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2746   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2747 
2748   PetscFunctionBegin;
2749   PetscCall(PetscMalloc1(M + 1, &ii));
2750   ii[0] = 0;
2751   for (i = 0; i < M; i++) {
2752     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2753     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2754     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2755     /* remove one from count of matrix has diagonal */
2756     for (j = id[i]; j < id[i + 1]; j++) {
2757       if (jd[j] == i) {
2758         ii[i + 1]--;
2759         break;
2760       }
2761     }
2762   }
2763   PetscCall(PetscMalloc1(ii[M], &jj));
2764   cnt = 0;
2765   for (i = 0; i < M; i++) {
2766     for (j = io[i]; j < io[i + 1]; j++) {
2767       if (garray[jo[j]] > rstart) break;
2768       jj[cnt++] = garray[jo[j]];
2769     }
2770     for (k = id[i]; k < id[i + 1]; k++) {
2771       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2772     }
2773     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2774   }
2775   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2776   PetscFunctionReturn(PETSC_SUCCESS);
2777 }
2778 
2779 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2780 
2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2782 
2783 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2784 {
2785   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2786   Mat_MPIAIJ  *b;
2787   Mat          B;
2788 
2789   PetscFunctionBegin;
2790   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2791 
2792   if (reuse == MAT_REUSE_MATRIX) {
2793     B = *newmat;
2794   } else {
2795     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2796     PetscCall(MatSetType(B, MATMPIAIJ));
2797     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2798     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2799     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2800     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2801   }
2802   b = (Mat_MPIAIJ *)B->data;
2803 
2804   if (reuse == MAT_REUSE_MATRIX) {
2805     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2806     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2807   } else {
2808     PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd;
2809     PetscCall(MatDestroy(&b->A));
2810     PetscCall(MatDestroy(&b->B));
2811     PetscCall(MatDisAssemble_MPIBAIJ(A));
2812     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2813     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2814     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2815     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2816     A->symmetric              = sym;
2817     A->hermitian              = hermitian;
2818     A->structurally_symmetric = structurally_symmetric;
2819     A->spd                    = spd;
2820   }
2821   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2822   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2823 
2824   if (reuse == MAT_INPLACE_MATRIX) {
2825     PetscCall(MatHeaderReplace(A, &B));
2826   } else {
2827     *newmat = B;
2828   }
2829   PetscFunctionReturn(PETSC_SUCCESS);
2830 }
2831 
2832 /*MC
2833    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2834 
2835    Options Database Keys:
2836 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2837 . -mat_block_size <bs> - set the blocksize used to store the matrix
2838 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2839 - -mat_use_hash_table <fact> - set hash table factor
2840 
2841    Level: beginner
2842 
2843    Note:
2844     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2845     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2846 
2847 .seealso: `Mat`, MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2848 M*/
2849 
2850 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2851 
2852 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2853 {
2854   Mat_MPIBAIJ *b;
2855   PetscBool    flg = PETSC_FALSE;
2856 
2857   PetscFunctionBegin;
2858   PetscCall(PetscNew(&b));
2859   B->data = (void *)b;
2860 
2861   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2862   B->assembled = PETSC_FALSE;
2863 
2864   B->insertmode = NOT_SET_VALUES;
2865   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2866   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2867 
2868   /* build local table of row and column ownerships */
2869   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2870 
2871   /* build cache for off array entries formed */
2872   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2873 
2874   b->donotstash  = PETSC_FALSE;
2875   b->colmap      = NULL;
2876   b->garray      = NULL;
2877   b->roworiented = PETSC_TRUE;
2878 
2879   /* stuff used in block assembly */
2880   b->barray = NULL;
2881 
2882   /* stuff used for matrix vector multiply */
2883   b->lvec  = NULL;
2884   b->Mvctx = NULL;
2885 
2886   /* stuff for MatGetRow() */
2887   b->rowindices   = NULL;
2888   b->rowvalues    = NULL;
2889   b->getrowactive = PETSC_FALSE;
2890 
2891   /* hash table stuff */
2892   b->ht           = NULL;
2893   b->hd           = NULL;
2894   b->ht_size      = 0;
2895   b->ht_flag      = PETSC_FALSE;
2896   b->ht_fact      = 0;
2897   b->ht_total_ct  = 0;
2898   b->ht_insert_ct = 0;
2899 
2900   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2901   b->ijonly = PETSC_FALSE;
2902 
2903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2906 #if defined(PETSC_HAVE_HYPRE)
2907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2908 #endif
2909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2916   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2917 
2918   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2919   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2920   if (flg) {
2921     PetscReal fact = 1.39;
2922     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2923     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2924     if (fact <= 1.0) fact = 1.39;
2925     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2926     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2927   }
2928   PetscOptionsEnd();
2929   PetscFunctionReturn(PETSC_SUCCESS);
2930 }
2931 
2932 /*MC
2933    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2934 
2935    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2936    and `MATMPIBAIJ` otherwise.
2937 
2938    Options Database Keys:
2939 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2940 
2941   Level: beginner
2942 
2943 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2944 M*/
2945 
2946 /*@C
2947    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2948    (block compressed row).
2949 
2950    Collective
2951 
2952    Input Parameters:
2953 +  B - the matrix
2954 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2955           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2956 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2957            submatrix  (same for all local rows)
2958 .  d_nnz - array containing the number of block nonzeros in the various block rows
2959            of the in diagonal portion of the local (possibly different for each block
2960            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2961            set it even if it is zero.
2962 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2963            submatrix (same for all local rows).
2964 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2965            off-diagonal portion of the local submatrix (possibly different for
2966            each block row) or `NULL`.
2967 
2968    If the *_nnz parameter is given then the *_nz parameter is ignored
2969 
2970    Options Database Keys:
2971 +   -mat_block_size - size of the blocks to use
2972 -   -mat_use_hash_table <fact> - set hash table factor
2973 
2974    Level: intermediate
2975 
2976    Notes:
2977    For good matrix assembly performance
2978    the user should preallocate the matrix storage by setting the parameters
2979    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2980    performance can be increased by more than a factor of 50.
2981 
2982    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2983    than it must be used on all processors that share the object for that argument.
2984 
2985    Storage Information:
2986    For a square global matrix we define each processor's diagonal portion
2987    to be its local rows and the corresponding columns (a square submatrix);
2988    each processor's off-diagonal portion encompasses the remainder of the
2989    local matrix (a rectangular submatrix).
2990 
2991    The user can specify preallocated storage for the diagonal part of
2992    the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2993    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2994    memory allocation.  Likewise, specify preallocated storage for the
2995    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2996 
2997    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2998    the figure below we depict these three local rows and all columns (0-11).
2999 
3000 .vb
3001            0 1 2 3 4 5 6 7 8 9 10 11
3002           --------------------------
3003    row 3  |o o o d d d o o o o  o  o
3004    row 4  |o o o d d d o o o o  o  o
3005    row 5  |o o o d d d o o o o  o  o
3006           --------------------------
3007 .ve
3008 
3009    Thus, any entries in the d locations are stored in the d (diagonal)
3010    submatrix, and any entries in the o locations are stored in the
3011    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3012    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3013 
3014    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3015    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3016    In general, for PDE problems in which most nonzeros are near the diagonal,
3017    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
3018    or you will get TERRIBLE performance; see the users' manual chapter on
3019    matrices.
3020 
3021    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3022    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3023    You can also run with the option `-info` and look for messages with the string
3024    malloc in them to see if additional memory allocation was needed.
3025 
3026 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3027 @*/
3028 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3029 {
3030   PetscFunctionBegin;
3031   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3032   PetscValidType(B, 1);
3033   PetscValidLogicalCollectiveInt(B, bs, 2);
3034   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3035   PetscFunctionReturn(PETSC_SUCCESS);
3036 }
3037 
3038 /*@C
3039    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3040    (block compressed row).
3041 
3042    Collective
3043 
3044    Input Parameters:
3045 +  comm - MPI communicator
3046 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3047           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3048 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3049            This value should be the same as the local size used in creating the
3050            y vector for the matrix-vector product y = Ax.
3051 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3052            This value should be the same as the local size used in creating the
3053            x vector for the matrix-vector product y = Ax.
3054 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3055 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3056 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3057            submatrix  (same for all local rows)
3058 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3059            of the in diagonal portion of the local (possibly different for each block
3060            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3061            and set it even if it is zero.
3062 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3063            submatrix (same for all local rows).
3064 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3065            off-diagonal portion of the local submatrix (possibly different for
3066            each block row) or NULL.
3067 
3068    Output Parameter:
3069 .  A - the matrix
3070 
3071    Options Database Keys:
3072 +   -mat_block_size - size of the blocks to use
3073 -   -mat_use_hash_table <fact> - set hash table factor
3074 
3075    Level: intermediate
3076 
3077    Notes:
3078    For good matrix assembly performance
3079    the user should preallocate the matrix storage by setting the parameters
3080    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3081    performance can be increased by more than a factor of 50.
3082 
3083    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3084    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3085    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3086 
3087    If the *_nnz parameter is given then the *_nz parameter is ignored
3088 
3089    A nonzero block is any block that as 1 or more nonzeros in it
3090 
3091    The user MUST specify either the local or global matrix dimensions
3092    (possibly both).
3093 
3094    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3095    than it must be used on all processors that share the object for that argument.
3096 
3097    Storage Information:
3098    For a square global matrix we define each processor's diagonal portion
3099    to be its local rows and the corresponding columns (a square submatrix);
3100    each processor's off-diagonal portion encompasses the remainder of the
3101    local matrix (a rectangular submatrix).
3102 
3103    The user can specify preallocated storage for the diagonal part of
3104    the local submatrix with either d_nz or d_nnz (not both).  Set
3105    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3106    memory allocation.  Likewise, specify preallocated storage for the
3107    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3108 
3109    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3110    the figure below we depict these three local rows and all columns (0-11).
3111 
3112 .vb
3113            0 1 2 3 4 5 6 7 8 9 10 11
3114           --------------------------
3115    row 3  |o o o d d d o o o o  o  o
3116    row 4  |o o o d d d o o o o  o  o
3117    row 5  |o o o d d d o o o o  o  o
3118           --------------------------
3119 .ve
3120 
3121    Thus, any entries in the d locations are stored in the d (diagonal)
3122    submatrix, and any entries in the o locations are stored in the
3123    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3124    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3125 
3126    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3127    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3128    In general, for PDE problems in which most nonzeros are near the diagonal,
3129    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
3130    or you will get TERRIBLE performance; see the users' manual chapter on
3131    matrices.
3132 
3133 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3134 @*/
3135 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3136 {
3137   PetscMPIInt size;
3138 
3139   PetscFunctionBegin;
3140   PetscCall(MatCreate(comm, A));
3141   PetscCall(MatSetSizes(*A, m, n, M, N));
3142   PetscCallMPI(MPI_Comm_size(comm, &size));
3143   if (size > 1) {
3144     PetscCall(MatSetType(*A, MATMPIBAIJ));
3145     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3146   } else {
3147     PetscCall(MatSetType(*A, MATSEQBAIJ));
3148     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3149   }
3150   PetscFunctionReturn(PETSC_SUCCESS);
3151 }
3152 
3153 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3154 {
3155   Mat          mat;
3156   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3157   PetscInt     len = 0;
3158 
3159   PetscFunctionBegin;
3160   *newmat = NULL;
3161   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3162   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3163   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3164 
3165   mat->factortype   = matin->factortype;
3166   mat->preallocated = PETSC_TRUE;
3167   mat->assembled    = PETSC_TRUE;
3168   mat->insertmode   = NOT_SET_VALUES;
3169 
3170   a             = (Mat_MPIBAIJ *)mat->data;
3171   mat->rmap->bs = matin->rmap->bs;
3172   a->bs2        = oldmat->bs2;
3173   a->mbs        = oldmat->mbs;
3174   a->nbs        = oldmat->nbs;
3175   a->Mbs        = oldmat->Mbs;
3176   a->Nbs        = oldmat->Nbs;
3177 
3178   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3179   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3180 
3181   a->size         = oldmat->size;
3182   a->rank         = oldmat->rank;
3183   a->donotstash   = oldmat->donotstash;
3184   a->roworiented  = oldmat->roworiented;
3185   a->rowindices   = NULL;
3186   a->rowvalues    = NULL;
3187   a->getrowactive = PETSC_FALSE;
3188   a->barray       = NULL;
3189   a->rstartbs     = oldmat->rstartbs;
3190   a->rendbs       = oldmat->rendbs;
3191   a->cstartbs     = oldmat->cstartbs;
3192   a->cendbs       = oldmat->cendbs;
3193 
3194   /* hash table stuff */
3195   a->ht           = NULL;
3196   a->hd           = NULL;
3197   a->ht_size      = 0;
3198   a->ht_flag      = oldmat->ht_flag;
3199   a->ht_fact      = oldmat->ht_fact;
3200   a->ht_total_ct  = 0;
3201   a->ht_insert_ct = 0;
3202 
3203   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3204   if (oldmat->colmap) {
3205 #if defined(PETSC_USE_CTABLE)
3206     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3207 #else
3208     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3209     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3210 #endif
3211   } else a->colmap = NULL;
3212 
3213   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3214     PetscCall(PetscMalloc1(len, &a->garray));
3215     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3216   } else a->garray = NULL;
3217 
3218   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3219   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3220   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3221 
3222   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3223   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3224   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3225   *newmat = mat;
3226   PetscFunctionReturn(PETSC_SUCCESS);
3227 }
3228 
3229 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3230 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3231 {
3232   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3233   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3234   PetscScalar *matvals;
3235 
3236   PetscFunctionBegin;
3237   PetscCall(PetscViewerSetUp(viewer));
3238 
3239   /* read in matrix header */
3240   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3241   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3242   M  = header[1];
3243   N  = header[2];
3244   nz = header[3];
3245   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3246   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3247   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3248 
3249   /* set block sizes from the viewer's .info file */
3250   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3251   /* set local sizes if not set already */
3252   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3253   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3254   /* set global sizes if not set already */
3255   if (mat->rmap->N < 0) mat->rmap->N = M;
3256   if (mat->cmap->N < 0) mat->cmap->N = N;
3257   PetscCall(PetscLayoutSetUp(mat->rmap));
3258   PetscCall(PetscLayoutSetUp(mat->cmap));
3259 
3260   /* check if the matrix sizes are correct */
3261   PetscCall(MatGetSize(mat, &rows, &cols));
3262   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3263   PetscCall(MatGetBlockSize(mat, &bs));
3264   PetscCall(MatGetLocalSize(mat, &m, &n));
3265   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3266   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3267   mbs = m / bs;
3268   nbs = n / bs;
3269 
3270   /* read in row lengths and build row indices */
3271   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3272   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3273   rowidxs[0] = 0;
3274   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3275   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3276   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3277 
3278   /* read in column indices and matrix values */
3279   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3280   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3281   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3282 
3283   {                /* preallocate matrix storage */
3284     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3285     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3286     PetscBool  sbaij, done;
3287     PetscInt  *d_nnz, *o_nnz;
3288 
3289     PetscCall(PetscBTCreate(nbs, &bt));
3290     PetscCall(PetscHSetICreate(&ht));
3291     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3292     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3293     for (i = 0; i < mbs; i++) {
3294       PetscCall(PetscBTMemzero(nbs, bt));
3295       PetscCall(PetscHSetIClear(ht));
3296       for (k = 0; k < bs; k++) {
3297         PetscInt row = bs * i + k;
3298         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3299           PetscInt col = colidxs[j];
3300           if (!sbaij || col >= row) {
3301             if (col >= cs && col < ce) {
3302               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3303             } else {
3304               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3305               if (done) o_nnz[i]++;
3306             }
3307           }
3308         }
3309       }
3310     }
3311     PetscCall(PetscBTDestroy(&bt));
3312     PetscCall(PetscHSetIDestroy(&ht));
3313     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3314     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3315     PetscCall(PetscFree2(d_nnz, o_nnz));
3316   }
3317 
3318   /* store matrix values */
3319   for (i = 0; i < m; i++) {
3320     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3321     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3322   }
3323 
3324   PetscCall(PetscFree(rowidxs));
3325   PetscCall(PetscFree2(colidxs, matvals));
3326   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3327   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3328   PetscFunctionReturn(PETSC_SUCCESS);
3329 }
3330 
3331 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3332 {
3333   PetscBool isbinary;
3334 
3335   PetscFunctionBegin;
3336   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3337   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3338   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3339   PetscFunctionReturn(PETSC_SUCCESS);
3340 }
3341 
3342 /*@
3343    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3344 
3345    Input Parameters:
3346 +  mat  - the matrix
3347 -  fact - factor
3348 
3349    Options Database Key:
3350 .  -mat_use_hash_table <fact> - provide the factor
3351 
3352    Level: advanced
3353 
3354 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3355 @*/
3356 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3357 {
3358   PetscFunctionBegin;
3359   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3360   PetscFunctionReturn(PETSC_SUCCESS);
3361 }
3362 
3363 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3364 {
3365   Mat_MPIBAIJ *baij;
3366 
3367   PetscFunctionBegin;
3368   baij          = (Mat_MPIBAIJ *)mat->data;
3369   baij->ht_fact = fact;
3370   PetscFunctionReturn(PETSC_SUCCESS);
3371 }
3372 
3373 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3374 {
3375   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3376   PetscBool    flg;
3377 
3378   PetscFunctionBegin;
3379   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3380   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3381   if (Ad) *Ad = a->A;
3382   if (Ao) *Ao = a->B;
3383   if (colmap) *colmap = a->garray;
3384   PetscFunctionReturn(PETSC_SUCCESS);
3385 }
3386 
3387 /*
3388     Special version for direct calls from Fortran (to eliminate two function call overheads
3389 */
3390 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3391   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3392 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3393   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3394 #endif
3395 
3396 /*@C
3397   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3398 
3399   Collective
3400 
3401   Input Parameters:
3402 + mat - the matrix
3403 . min - number of input rows
3404 . im - input rows
3405 . nin - number of input columns
3406 . in - input columns
3407 . v - numerical values input
3408 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3409 
3410   Developer Note:
3411     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3412 
3413   Level: advanced
3414 
3415 .seealso: `Mat`, `MatSetValuesBlocked()`
3416 @*/
3417 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3418 {
3419   /* convert input arguments to C version */
3420   Mat        mat = *matin;
3421   PetscInt   m = *min, n = *nin;
3422   InsertMode addv = *addvin;
3423 
3424   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3425   const MatScalar *value;
3426   MatScalar       *barray      = baij->barray;
3427   PetscBool        roworiented = baij->roworiented;
3428   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3429   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3430   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3431 
3432   PetscFunctionBegin;
3433   /* tasks normally handled by MatSetValuesBlocked() */
3434   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3435   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3436   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3437   if (mat->assembled) {
3438     mat->was_assembled = PETSC_TRUE;
3439     mat->assembled     = PETSC_FALSE;
3440   }
3441   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3442 
3443   if (!barray) {
3444     PetscCall(PetscMalloc1(bs2, &barray));
3445     baij->barray = barray;
3446   }
3447 
3448   if (roworiented) stepval = (n - 1) * bs;
3449   else stepval = (m - 1) * bs;
3450 
3451   for (i = 0; i < m; i++) {
3452     if (im[i] < 0) continue;
3453     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3454     if (im[i] >= rstart && im[i] < rend) {
3455       row = im[i] - rstart;
3456       for (j = 0; j < n; j++) {
3457         /* If NumCol = 1 then a copy is not required */
3458         if ((roworiented) && (n == 1)) {
3459           barray = (MatScalar *)v + i * bs2;
3460         } else if ((!roworiented) && (m == 1)) {
3461           barray = (MatScalar *)v + j * bs2;
3462         } else { /* Here a copy is required */
3463           if (roworiented) {
3464             value = v + i * (stepval + bs) * bs + j * bs;
3465           } else {
3466             value = v + j * (stepval + bs) * bs + i * bs;
3467           }
3468           for (ii = 0; ii < bs; ii++, value += stepval) {
3469             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3470           }
3471           barray -= bs2;
3472         }
3473 
3474         if (in[j] >= cstart && in[j] < cend) {
3475           col = in[j] - cstart;
3476           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3477         } else if (in[j] < 0) {
3478           continue;
3479         } else {
3480           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3481           if (mat->was_assembled) {
3482             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3483 
3484 #if defined(PETSC_USE_DEBUG)
3485   #if defined(PETSC_USE_CTABLE)
3486             {
3487               PetscInt data;
3488               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3489               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3490             }
3491   #else
3492             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3493   #endif
3494 #endif
3495 #if defined(PETSC_USE_CTABLE)
3496             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3497             col = (col - 1) / bs;
3498 #else
3499             col = (baij->colmap[in[j]] - 1) / bs;
3500 #endif
3501             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3502               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3503               col = in[j];
3504             }
3505           } else col = in[j];
3506           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3507         }
3508       }
3509     } else {
3510       if (!baij->donotstash) {
3511         if (roworiented) {
3512           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3513         } else {
3514           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3515         }
3516       }
3517     }
3518   }
3519 
3520   /* task normally handled by MatSetValuesBlocked() */
3521   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3522   PetscFunctionReturn(PETSC_SUCCESS);
3523 }
3524 
3525 /*@
3526      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3527          CSR format the local rows.
3528 
3529    Collective
3530 
3531    Input Parameters:
3532 +  comm - MPI communicator
3533 .  bs - the block size, only a block size of 1 is supported
3534 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3535 .  n - This value should be the same as the local size used in creating the
3536        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3537        calculated if N is given) For square matrices n is almost always m.
3538 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3539 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3540 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3541 .   j - column indices
3542 -   a - matrix values
3543 
3544    Output Parameter:
3545 .   mat - the matrix
3546 
3547    Level: intermediate
3548 
3549    Notes:
3550        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3551      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3552      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3553 
3554      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3555      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3556      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3557      with column-major ordering within blocks.
3558 
3559        The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3560 
3561 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3562           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3563 @*/
3564 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3565 {
3566   PetscFunctionBegin;
3567   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3568   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3569   PetscCall(MatCreate(comm, mat));
3570   PetscCall(MatSetSizes(*mat, m, n, M, N));
3571   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3572   PetscCall(MatSetBlockSize(*mat, bs));
3573   PetscCall(MatSetUp(*mat));
3574   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3575   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3576   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3577   PetscFunctionReturn(PETSC_SUCCESS);
3578 }
3579 
3580 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3581 {
3582   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3583   PetscInt    *indx;
3584   PetscScalar *values;
3585 
3586   PetscFunctionBegin;
3587   PetscCall(MatGetSize(inmat, &m, &N));
3588   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3589     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3590     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3591     PetscInt    *bindx, rmax = a->rmax, j;
3592     PetscMPIInt  rank, size;
3593 
3594     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3595     mbs = m / bs;
3596     Nbs = N / cbs;
3597     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3598     nbs = n / cbs;
3599 
3600     PetscCall(PetscMalloc1(rmax, &bindx));
3601     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3602 
3603     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3604     PetscCallMPI(MPI_Comm_rank(comm, &size));
3605     if (rank == size - 1) {
3606       /* Check sum(nbs) = Nbs */
3607       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3608     }
3609 
3610     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3611     for (i = 0; i < mbs; i++) {
3612       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3613       nnz = nnz / bs;
3614       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3615       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3616       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3617     }
3618     PetscCall(PetscFree(bindx));
3619 
3620     PetscCall(MatCreate(comm, outmat));
3621     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3622     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3623     PetscCall(MatSetType(*outmat, MATBAIJ));
3624     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3625     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3626     MatPreallocateEnd(dnz, onz);
3627     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3628   }
3629 
3630   /* numeric phase */
3631   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3632   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3633 
3634   for (i = 0; i < m; i++) {
3635     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3636     Ii = i + rstart;
3637     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3638     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3639   }
3640   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3641   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3642   PetscFunctionReturn(PETSC_SUCCESS);
3643 }
3644