xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 42ce371b2bd7d45eb85bb2bb31075ac1967f9fc8)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
10 {
11   Mat B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
15   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
16   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
17   PetscCall(MatDestroy(&B));
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
22 {
23   Mat B;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
27   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
28   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
29   PetscFunctionReturn(0);
30 }
31 
32 /*MC
33    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
36    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
37   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
43 
44   Developer Note:
45     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
46    enough exist.
47 
48   Level: beginner
49 
50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
51 M*/
52 
53 /*MC
54    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
55 
56    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
57    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
58    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
59   for communicators controlling multiple processes.  It is recommended that you call both of
60   the above preallocation routines for simplicity.
61 
62    Options Database Keys:
63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
64 
65   Level: beginner
66 
67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
68 M*/
69 
70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
71 {
72   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
73 
74   PetscFunctionBegin;
75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
76   A->boundtocpu = flg;
77 #endif
78   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
79   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
80 
81   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
82    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
83    * to differ from the parent matrix. */
84   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
85   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
86 
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
91 {
92   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
93 
94   PetscFunctionBegin;
95   if (mat->A) {
96     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
97     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
103 {
104   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
105   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
106   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
107   const PetscInt  *ia, *ib;
108   const MatScalar *aa, *bb, *aav, *bav;
109   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
110   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
111 
112   PetscFunctionBegin;
113   *keptrows = NULL;
114 
115   ia = a->i;
116   ib = b->i;
117   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
118   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
119   for (i = 0; i < m; i++) {
120     na = ia[i + 1] - ia[i];
121     nb = ib[i + 1] - ib[i];
122     if (!na && !nb) {
123       cnt++;
124       goto ok1;
125     }
126     aa = aav + ia[i];
127     for (j = 0; j < na; j++) {
128       if (aa[j] != 0.0) goto ok1;
129     }
130     bb = bav + ib[i];
131     for (j = 0; j < nb; j++) {
132       if (bb[j] != 0.0) goto ok1;
133     }
134     cnt++;
135   ok1:;
136   }
137   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
138   if (!n0rows) {
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
140     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
141     PetscFunctionReturn(0);
142   }
143   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
144   cnt = 0;
145   for (i = 0; i < m; i++) {
146     na = ia[i + 1] - ia[i];
147     nb = ib[i + 1] - ib[i];
148     if (!na && !nb) continue;
149     aa = aav + ia[i];
150     for (j = 0; j < na; j++) {
151       if (aa[j] != 0.0) {
152         rows[cnt++] = rstart + i;
153         goto ok2;
154       }
155     }
156     bb = bav + ib[i];
157     for (j = 0; j < nb; j++) {
158       if (bb[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163   ok2:;
164   }
165   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
167   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
172 {
173   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
174   PetscBool   cong;
175 
176   PetscFunctionBegin;
177   PetscCall(MatHasCongruentLayouts(Y, &cong));
178   if (Y->assembled && cong) {
179     PetscCall(MatDiagonalSet(aij->A, D, is));
180   } else {
181     PetscCall(MatDiagonalSet_Default(Y, D, is));
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
187 {
188   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
189   PetscInt    i, rstart, nrows, *rows;
190 
191   PetscFunctionBegin;
192   *zrows = NULL;
193   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
194   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
195   for (i = 0; i < nrows; i++) rows[i] += rstart;
196   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
201 {
202   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
203   PetscInt           i, m, n, *garray = aij->garray;
204   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
205   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
206   PetscReal         *work;
207   const PetscScalar *dummy;
208 
209   PetscFunctionBegin;
210   PetscCall(MatGetSize(A, &m, &n));
211   PetscCall(PetscCalloc1(n, &work));
212   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
213   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
214   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
215   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
216   if (type == NORM_2) {
217     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
218     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
219   } else if (type == NORM_1) {
220     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
221     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
222   } else if (type == NORM_INFINITY) {
223     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
225   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
226     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
227     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
229     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
230     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
231   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
234   } else {
235     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
236   }
237   PetscCall(PetscFree(work));
238   if (type == NORM_2) {
239     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i = 0; i < n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
247 {
248   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
249   IS              sis, gis;
250   const PetscInt *isis, *igis;
251   PetscInt        n, *iis, nsis, ngis, rstart, i;
252 
253   PetscFunctionBegin;
254   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
255   PetscCall(MatFindNonzeroRows(a->B, &gis));
256   PetscCall(ISGetSize(gis, &ngis));
257   PetscCall(ISGetSize(sis, &nsis));
258   PetscCall(ISGetIndices(sis, &isis));
259   PetscCall(ISGetIndices(gis, &igis));
260 
261   PetscCall(PetscMalloc1(ngis + nsis, &iis));
262   PetscCall(PetscArraycpy(iis, igis, ngis));
263   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
264   n = ngis + nsis;
265   PetscCall(PetscSortRemoveDupsInt(&n, iis));
266   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
267   for (i = 0; i < n; i++) iis[i] += rstart;
268   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
269 
270   PetscCall(ISRestoreIndices(sis, &isis));
271   PetscCall(ISRestoreIndices(gis, &igis));
272   PetscCall(ISDestroy(&sis));
273   PetscCall(ISDestroy(&gis));
274   PetscFunctionReturn(0);
275 }
276 
277 /*
278   Local utility routine that creates a mapping from the global column
279 number to the local number in the off-diagonal part of the local
280 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
281 a slightly higher hash table cost; without it it is not scalable (each processor
282 has an order N integer array but is fast to access.
283 */
284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
285 {
286   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
287   PetscInt    n   = aij->B->cmap->n, i;
288 
289   PetscFunctionBegin;
290   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
291 #if defined(PETSC_USE_CTABLE)
292   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
293   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
294 #else
295   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
296   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
297 #endif
298   PetscFunctionReturn(0);
299 }
300 
301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
302   { \
303     if (col <= lastcol1) low1 = 0; \
304     else high1 = nrow1; \
305     lastcol1 = col; \
306     while (high1 - low1 > 5) { \
307       t = (low1 + high1) / 2; \
308       if (rp1[t] > col) high1 = t; \
309       else low1 = t; \
310     } \
311     for (_i = low1; _i < high1; _i++) { \
312       if (rp1[_i] > col) break; \
313       if (rp1[_i] == col) { \
314         if (addv == ADD_VALUES) { \
315           ap1[_i] += value; \
316           /* Not sure LogFlops will slow dow the code or not */ \
317           (void)PetscLogFlops(1.0); \
318         } else ap1[_i] = value; \
319         goto a_noinsert; \
320       } \
321     } \
322     if (value == 0.0 && ignorezeroentries && row != col) { \
323       low1  = 0; \
324       high1 = nrow1; \
325       goto a_noinsert; \
326     } \
327     if (nonew == 1) { \
328       low1  = 0; \
329       high1 = nrow1; \
330       goto a_noinsert; \
331     } \
332     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
333     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
334     N = nrow1++ - 1; \
335     a->nz++; \
336     high1++; \
337     /* shift up all the later entries in this row */ \
338     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
339     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
340     rp1[_i] = col; \
341     ap1[_i] = value; \
342     A->nonzerostate++; \
343   a_noinsert:; \
344     ailen[row] = nrow1; \
345   }
346 
347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
348   { \
349     if (col <= lastcol2) low2 = 0; \
350     else high2 = nrow2; \
351     lastcol2 = col; \
352     while (high2 - low2 > 5) { \
353       t = (low2 + high2) / 2; \
354       if (rp2[t] > col) high2 = t; \
355       else low2 = t; \
356     } \
357     for (_i = low2; _i < high2; _i++) { \
358       if (rp2[_i] > col) break; \
359       if (rp2[_i] == col) { \
360         if (addv == ADD_VALUES) { \
361           ap2[_i] += value; \
362           (void)PetscLogFlops(1.0); \
363         } else ap2[_i] = value; \
364         goto b_noinsert; \
365       } \
366     } \
367     if (value == 0.0 && ignorezeroentries) { \
368       low2  = 0; \
369       high2 = nrow2; \
370       goto b_noinsert; \
371     } \
372     if (nonew == 1) { \
373       low2  = 0; \
374       high2 = nrow2; \
375       goto b_noinsert; \
376     } \
377     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
379     N = nrow2++ - 1; \
380     b->nz++; \
381     high2++; \
382     /* shift up all the later entries in this row */ \
383     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
384     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
385     rp2[_i] = col; \
386     ap2[_i] = value; \
387     B->nonzerostate++; \
388   b_noinsert:; \
389     bilen[row] = nrow2; \
390   }
391 
392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
393 {
394   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
395   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
396   PetscInt     l, *garray                         = mat->garray, diag;
397   PetscScalar *aa, *ba;
398 
399   PetscFunctionBegin;
400   /* code only works for square matrices A */
401 
402   /* find size of row to the left of the diagonal part */
403   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
404   row = row - diag;
405   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
406     if (garray[b->j[b->i[row] + l]] > diag) break;
407   }
408   if (l) {
409     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
410     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
411     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
412   }
413 
414   /* diagonal part */
415   if (a->i[row + 1] - a->i[row]) {
416     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
417     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
418     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
419   }
420 
421   /* right of diagonal part */
422   if (b->i[row + 1] - b->i[row] - l) {
423     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
424     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
425     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
431 {
432   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
433   PetscScalar value = 0.0;
434   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
435   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
436   PetscBool   roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat         A     = aij->A;
440   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
441   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
442   PetscBool   ignorezeroentries = a->ignorezeroentries;
443   Mat         B                 = aij->B;
444   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
445   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
446   MatScalar  *aa, *ba;
447   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
448   PetscInt    nonew;
449   MatScalar  *ap1, *ap2;
450 
451   PetscFunctionBegin;
452   PetscCall(MatSeqAIJGetArray(A, &aa));
453   PetscCall(MatSeqAIJGetArray(B, &ba));
454   for (i = 0; i < m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j = 0; j < n; j++) {
475         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
481         } else if (in[j] < 0) {
482           continue;
483         } else {
484           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
485           if (mat->was_assembled) {
486             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
487 #if defined(PETSC_USE_CTABLE)
488             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
489             col--;
490 #else
491             col = aij->colmap[in[j]] - 1;
492 #endif
493             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
494               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
495               col = in[j];
496               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
497               B     = aij->B;
498               b     = (Mat_SeqAIJ *)B->data;
499               bimax = b->imax;
500               bi    = b->i;
501               bilen = b->ilen;
502               bj    = b->j;
503               ba    = b->a;
504               rp2   = bj + bi[row];
505               ap2   = ba + bi[row];
506               rmax2 = bimax[row];
507               nrow2 = bilen[row];
508               low2  = 0;
509               high2 = nrow2;
510               bm    = aij->B->rmap->n;
511               ba    = b->a;
512             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
513               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
514                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
515               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
516             }
517           } else col = in[j];
518           nonew = b->nonew;
519           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
520         }
521       }
522     } else {
523       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
524       if (!aij->donotstash) {
525         mat->assembled = PETSC_FALSE;
526         if (roworiented) {
527           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
528         } else {
529           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         }
531       }
532     }
533   }
534   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
535   PetscCall(MatSeqAIJRestoreArray(B, &ba));
536   PetscFunctionReturn(0);
537 }
538 
539 /*
540     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
541     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
542     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
543 */
544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
545 {
546   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
547   Mat         A      = aij->A; /* diagonal part of the matrix */
548   Mat         B      = aij->B; /* offdiagonal part of the matrix */
549   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
550   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
551   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
552   PetscInt   *ailen = a->ilen, *aj = a->j;
553   PetscInt   *bilen = b->ilen, *bj = b->j;
554   PetscInt    am          = aij->A->rmap->n, j;
555   PetscInt    diag_so_far = 0, dnz;
556   PetscInt    offd_so_far = 0, onz;
557 
558   PetscFunctionBegin;
559   /* Iterate over all rows of the matrix */
560   for (j = 0; j < am; j++) {
561     dnz = onz = 0;
562     /*  Iterate over all non-zero columns of the current row */
563     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
564       /* If column is in the diagonal */
565       if (mat_j[col] >= cstart && mat_j[col] < cend) {
566         aj[diag_so_far++] = mat_j[col] - cstart;
567         dnz++;
568       } else { /* off-diagonal entries */
569         bj[offd_so_far++] = mat_j[col];
570         onz++;
571       }
572     }
573     ailen[j] = dnz;
574     bilen[j] = onz;
575   }
576   PetscFunctionReturn(0);
577 }
578 
579 /*
580     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
581     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
582     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
583     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
584     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
585 */
586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
587 {
588   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
589   Mat          A    = aij->A; /* diagonal part of the matrix */
590   Mat          B    = aij->B; /* offdiagonal part of the matrix */
591   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
592   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
593   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
594   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
595   PetscInt    *ailen = a->ilen, *aj = a->j;
596   PetscInt    *bilen = b->ilen, *bj = b->j;
597   PetscInt     am          = aij->A->rmap->n, j;
598   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
599   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
600   PetscScalar *aa = a->a, *ba = b->a;
601 
602   PetscFunctionBegin;
603   /* Iterate over all rows of the matrix */
604   for (j = 0; j < am; j++) {
605     dnz_row = onz_row = 0;
606     rowstart_offd     = full_offd_i[j];
607     rowstart_diag     = full_diag_i[j];
608     /*  Iterate over all non-zero columns of the current row */
609     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
610       /* If column is in the diagonal */
611       if (mat_j[col] >= cstart && mat_j[col] < cend) {
612         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
613         aa[rowstart_diag + dnz_row] = mat_a[col];
614         dnz_row++;
615       } else { /* off-diagonal entries */
616         bj[rowstart_offd + onz_row] = mat_j[col];
617         ba[rowstart_offd + onz_row] = mat_a[col];
618         onz_row++;
619       }
620     }
621     ailen[j] = dnz_row;
622     bilen[j] = onz_row;
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
628 {
629   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
630   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
632 
633   PetscFunctionBegin;
634   for (i = 0; i < m; i++) {
635     if (idxm[i] < 0) continue; /* negative row */
636     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j = 0; j < n; j++) {
640         if (idxn[j] < 0) continue; /* negative column */
641         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col = idxn[j] - cstart;
644           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
645         } else {
646           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
647 #if defined(PETSC_USE_CTABLE)
648           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
654           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
655         }
656       }
657     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
663 {
664   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
665   PetscInt    nstash, reallocs;
666 
667   PetscFunctionBegin;
668   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
669 
670   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
671   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
672   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
673   PetscFunctionReturn(0);
674 }
675 
676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
677 {
678   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
679   PetscMPIInt  n;
680   PetscInt     i, j, rstart, ncols, flg;
681   PetscInt    *row, *col;
682   PetscBool    other_disassembled;
683   PetscScalar *val;
684 
685   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
686 
687   PetscFunctionBegin;
688   if (!aij->donotstash && !mat->nooffprocentries) {
689     while (1) {
690       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
691       if (!flg) break;
692 
693       for (i = 0; i < n;) {
694         /* Now identify the consecutive vals belonging to the same row */
695         for (j = i, rstart = row[j]; j < n; j++) {
696           if (row[j] != rstart) break;
697         }
698         if (j < n) ncols = j - i;
699         else ncols = n - i;
700         /* Now assemble all these values with a single function call */
701         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
702         i = j;
703       }
704     }
705     PetscCall(MatStashScatterEnd_Private(&mat->stash));
706   }
707 #if defined(PETSC_HAVE_DEVICE)
708   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
709   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
710   if (mat->boundtocpu) {
711     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
712     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
713   }
714 #endif
715   PetscCall(MatAssemblyBegin(aij->A, mode));
716   PetscCall(MatAssemblyEnd(aij->A, mode));
717 
718   /* determine if any processor has disassembled, if so we must
719      also disassemble ourself, in order that we may reassemble. */
720   /*
721      if nonzero structure of submatrix B cannot change then we know that
722      no processor disassembled thus we can skip this stuff
723   */
724   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
725     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
726     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
727       PetscCall(MatDisAssemble_MPIAIJ(mat));
728     }
729   }
730   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
731   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
732 #if defined(PETSC_HAVE_DEVICE)
733   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
734 #endif
735   PetscCall(MatAssemblyBegin(aij->B, mode));
736   PetscCall(MatAssemblyEnd(aij->B, mode));
737 
738   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
739 
740   aij->rowvalues = NULL;
741 
742   PetscCall(VecDestroy(&aij->diag));
743 
744   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
745   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
746     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
747     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
748   }
749 #if defined(PETSC_HAVE_DEVICE)
750   mat->offloadmask = PETSC_OFFLOAD_BOTH;
751 #endif
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
756 {
757   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
758 
759   PetscFunctionBegin;
760   PetscCall(MatZeroEntries(l->A));
761   PetscCall(MatZeroEntries(l->B));
762   PetscFunctionReturn(0);
763 }
764 
765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
766 {
767   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
768   PetscObjectState sA, sB;
769   PetscInt        *lrows;
770   PetscInt         r, len;
771   PetscBool        cong, lch, gch;
772 
773   PetscFunctionBegin;
774   /* get locally owned rows */
775   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
776   PetscCall(MatHasCongruentLayouts(A, &cong));
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
783     PetscCall(VecGetArrayRead(x, &xx));
784     PetscCall(VecGetArray(b, &bb));
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
786     PetscCall(VecRestoreArrayRead(x, &xx));
787     PetscCall(VecRestoreArray(b, &bb));
788   }
789 
790   sA = mat->A->nonzerostate;
791   sB = mat->B->nonzerostate;
792 
793   if (diag != 0.0 && cong) {
794     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
795     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
796   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
797     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
798     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
799     PetscInt    nnwA, nnwB;
800     PetscBool   nnzA, nnzB;
801 
802     nnwA = aijA->nonew;
803     nnwB = aijB->nonew;
804     nnzA = aijA->keepnonzeropattern;
805     nnzB = aijB->keepnonzeropattern;
806     if (!nnzA) {
807       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
808       aijA->nonew = 0;
809     }
810     if (!nnzB) {
811       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
812       aijB->nonew = 0;
813     }
814     /* Must zero here before the next loop */
815     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
816     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       if (row >= A->cmap->N) continue;
820       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
821     }
822     aijA->nonew = nnwA;
823     aijB->nonew = nnwB;
824   } else {
825     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
826     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
827   }
828   PetscCall(PetscFree(lrows));
829   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
830   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
831 
832   /* reduce nonzerostate */
833   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
834   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
835   if (gch) A->nonzerostate++;
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
840 {
841   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
842   PetscMPIInt        n = A->rmap->n;
843   PetscInt           i, j, r, m, len = 0;
844   PetscInt          *lrows, *owners = A->rmap->range;
845   PetscMPIInt        p = 0;
846   PetscSFNode       *rrows;
847   PetscSF            sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb, *mask, *aij_a;
850   Vec                xmask, lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
852   const PetscInt    *aj, *ii, *ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   PetscCall(PetscMalloc1(n, &lrows));
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   PetscCall(PetscMalloc1(N, &rrows));
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx = rows[r];
862     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
863     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
870   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
871   /* Collect flags for rows to be zeroed */
872   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
873   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
874   PetscCall(PetscSFDestroy(&sf));
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r)
877     if (lrows[r] >= 0) lrows[len++] = r;
878   /* zero diagonal part of matrix */
879   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
880   /* handle off diagonal part of matrix */
881   PetscCall(MatCreateVecs(A, &xmask, NULL));
882   PetscCall(VecDuplicate(l->lvec, &lmask));
883   PetscCall(VecGetArray(xmask, &bb));
884   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
885   PetscCall(VecRestoreArray(xmask, &bb));
886   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
887   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
888   PetscCall(VecDestroy(&xmask));
889   if (x && b) { /* this code is buggy when the row and column layout don't match */
890     PetscBool cong;
891 
892     PetscCall(MatHasCongruentLayouts(A, &cong));
893     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
894     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
895     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
896     PetscCall(VecGetArrayRead(l->lvec, &xx));
897     PetscCall(VecGetArray(b, &bb));
898   }
899   PetscCall(VecGetArray(lmask, &mask));
900   /* remove zeroed rows of off diagonal matrix */
901   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
902   ii = aij->i;
903   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i = 0; i < m; i++) {
910       n  = ii[i + 1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij_a + ii[i];
913 
914       for (j = 0; j < n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa * xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i = 0; i < m; i++) {
927       n  = ii[i + 1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij_a + ii[i];
930       for (j = 0; j < n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa * xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     PetscCall(VecRestoreArray(b, &bb));
942     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
943   }
944   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
945   PetscCall(VecRestoreArray(lmask, &mask));
946   PetscCall(VecDestroy(&lmask));
947   PetscCall(PetscFree(lrows));
948 
949   /* only change matrix nonzero state if pattern was allowed to be changed */
950   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
951     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
952     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
958 {
959   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
960   PetscInt    nt;
961   VecScatter  Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   PetscCall(VecGetLocalSize(xx, &nt));
965   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
966   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
967   PetscUseTypeMethod(a->A, mult, xx, yy);
968   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
969   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
974 {
975   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
976 
977   PetscFunctionBegin;
978   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
983 {
984   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
985   VecScatter  Mvctx = a->Mvctx;
986 
987   PetscFunctionBegin;
988   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
989   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
990   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
991   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
992   PetscFunctionReturn(0);
993 }
994 
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
996 {
997   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
998 
999   PetscFunctionBegin;
1000   /* do nondiagonal part */
1001   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1002   /* do local part */
1003   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1004   /* add partial results together */
1005   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1006   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1011 {
1012   MPI_Comm    comm;
1013   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
1014   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
1015   IS          Me, Notme;
1016   PetscInt    M, N, first, last, *notme, i;
1017   PetscBool   lf;
1018   PetscMPIInt size;
1019 
1020   PetscFunctionBegin;
1021   /* Easy test: symmetric diagonal block */
1022   Bij  = (Mat_MPIAIJ *)Bmat->data;
1023   Bdia = Bij->A;
1024   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1025   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1026   if (!*f) PetscFunctionReturn(0);
1027   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1028   PetscCallMPI(MPI_Comm_size(comm, &size));
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1032   PetscCall(MatGetSize(Amat, &M, &N));
1033   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1034   PetscCall(PetscMalloc1(N - last + first, &notme));
1035   for (i = 0; i < first; i++) notme[i] = i;
1036   for (i = last; i < M; i++) notme[i - last + first] = i;
1037   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1038   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1039   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1040   Aoff = Aoffs[0];
1041   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1042   Boff = Boffs[0];
1043   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1044   PetscCall(MatDestroyMatrices(1, &Aoffs));
1045   PetscCall(MatDestroyMatrices(1, &Boffs));
1046   PetscCall(ISDestroy(&Me));
1047   PetscCall(ISDestroy(&Notme));
1048   PetscCall(PetscFree(notme));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1053 {
1054   PetscFunctionBegin;
1055   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1060 {
1061   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1062 
1063   PetscFunctionBegin;
1064   /* do nondiagonal part */
1065   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1066   /* do local part */
1067   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1068   /* add partial results together */
1069   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1070   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /*
1075   This only works correctly for square matrices where the subblock A->A is the
1076    diagonal block
1077 */
1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1079 {
1080   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1081 
1082   PetscFunctionBegin;
1083   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1084   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1085   PetscCall(MatGetDiagonal(a->A, v));
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCall(MatScale(a->A, aa));
1095   PetscCall(MatScale(a->B, aa));
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1101 {
1102   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(PetscSFDestroy(&aij->coo_sf));
1106   PetscCall(PetscFree(aij->Aperm1));
1107   PetscCall(PetscFree(aij->Bperm1));
1108   PetscCall(PetscFree(aij->Ajmap1));
1109   PetscCall(PetscFree(aij->Bjmap1));
1110 
1111   PetscCall(PetscFree(aij->Aimap2));
1112   PetscCall(PetscFree(aij->Bimap2));
1113   PetscCall(PetscFree(aij->Aperm2));
1114   PetscCall(PetscFree(aij->Bperm2));
1115   PetscCall(PetscFree(aij->Ajmap2));
1116   PetscCall(PetscFree(aij->Bjmap2));
1117 
1118   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1119   PetscCall(PetscFree(aij->Cperm1));
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1124 {
1125   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1126 
1127   PetscFunctionBegin;
1128 #if defined(PETSC_USE_LOG)
1129   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1130 #endif
1131   PetscCall(MatStashDestroy_Private(&mat->stash));
1132   PetscCall(VecDestroy(&aij->diag));
1133   PetscCall(MatDestroy(&aij->A));
1134   PetscCall(MatDestroy(&aij->B));
1135 #if defined(PETSC_USE_CTABLE)
1136   PetscCall(PetscHMapIDestroy(&aij->colmap));
1137 #else
1138   PetscCall(PetscFree(aij->colmap));
1139 #endif
1140   PetscCall(PetscFree(aij->garray));
1141   PetscCall(VecDestroy(&aij->lvec));
1142   PetscCall(VecScatterDestroy(&aij->Mvctx));
1143   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
1144   PetscCall(PetscFree(aij->ld));
1145 
1146   /* Free COO */
1147   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1148 
1149   PetscCall(PetscFree(mat->data));
1150 
1151   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1152   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
1153 
1154   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1157   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
1158   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
1160   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
1164 #if defined(PETSC_HAVE_CUDA)
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
1166 #endif
1167 #if defined(PETSC_HAVE_HIP)
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
1169 #endif
1170 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
1172 #endif
1173   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
1174 #if defined(PETSC_HAVE_ELEMENTAL)
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
1176 #endif
1177 #if defined(PETSC_HAVE_SCALAPACK)
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1179 #endif
1180 #if defined(PETSC_HAVE_HYPRE)
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
1182   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
1183 #endif
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
1190 #if defined(PETSC_HAVE_MKL_SPARSE)
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
1192 #endif
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1202 {
1203   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1204   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1205   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1206   const PetscInt    *garray = aij->garray;
1207   const PetscScalar *aa, *ba;
1208   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1209   PetscInt          *rowlens;
1210   PetscInt          *colidxs;
1211   PetscScalar       *matvals;
1212 
1213   PetscFunctionBegin;
1214   PetscCall(PetscViewerSetUp(viewer));
1215 
1216   M  = mat->rmap->N;
1217   N  = mat->cmap->N;
1218   m  = mat->rmap->n;
1219   rs = mat->rmap->rstart;
1220   cs = mat->cmap->rstart;
1221   nz = A->nz + B->nz;
1222 
1223   /* write matrix header */
1224   header[0] = MAT_FILE_CLASSID;
1225   header[1] = M;
1226   header[2] = N;
1227   header[3] = nz;
1228   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1229   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1230 
1231   /* fill in and store row lengths  */
1232   PetscCall(PetscMalloc1(m, &rowlens));
1233   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1235   PetscCall(PetscFree(rowlens));
1236 
1237   /* fill in and store column indices */
1238   PetscCall(PetscMalloc1(nz, &colidxs));
1239   for (cnt = 0, i = 0; i < m; i++) {
1240     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1241       if (garray[B->j[jb]] > cs) break;
1242       colidxs[cnt++] = garray[B->j[jb]];
1243     }
1244     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1245     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1246   }
1247   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1248   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1249   PetscCall(PetscFree(colidxs));
1250 
1251   /* fill in and store nonzero values */
1252   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1253   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1254   PetscCall(PetscMalloc1(nz, &matvals));
1255   for (cnt = 0, i = 0; i < m; i++) {
1256     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1257       if (garray[B->j[jb]] > cs) break;
1258       matvals[cnt++] = ba[jb];
1259     }
1260     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1261     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1262   }
1263   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1264   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1265   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1266   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1267   PetscCall(PetscFree(matvals));
1268 
1269   /* write block size option to the viewer's .info file */
1270   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #include <petscdraw.h>
1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1276 {
1277   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1278   PetscMPIInt       rank = aij->rank, size = aij->size;
1279   PetscBool         isdraw, iascii, isbinary;
1280   PetscViewer       sviewer;
1281   PetscViewerFormat format;
1282 
1283   PetscFunctionBegin;
1284   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1285   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1286   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1287   if (iascii) {
1288     PetscCall(PetscViewerGetFormat(viewer, &format));
1289     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1290       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1291       PetscCall(PetscMalloc1(size, &nz));
1292       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1293       for (i = 0; i < (PetscInt)size; i++) {
1294         nmax = PetscMax(nmax, nz[i]);
1295         nmin = PetscMin(nmin, nz[i]);
1296         navg += nz[i];
1297       }
1298       PetscCall(PetscFree(nz));
1299       navg = navg / size;
1300       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1301       PetscFunctionReturn(0);
1302     }
1303     PetscCall(PetscViewerGetFormat(viewer, &format));
1304     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1305       MatInfo   info;
1306       PetscInt *inodes = NULL;
1307 
1308       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1309       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1310       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1311       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1312       if (!inodes) {
1313         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1314                                                      (double)info.memory));
1315       } else {
1316         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1317                                                      (double)info.memory));
1318       }
1319       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1320       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1321       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1322       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1323       PetscCall(PetscViewerFlush(viewer));
1324       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1325       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1326       PetscCall(VecScatterView(aij->Mvctx, viewer));
1327       PetscFunctionReturn(0);
1328     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1329       PetscInt inodecount, inodelimit, *inodes;
1330       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1331       if (inodes) {
1332         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1333       } else {
1334         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1335       }
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1338       PetscFunctionReturn(0);
1339     }
1340   } else if (isbinary) {
1341     if (size == 1) {
1342       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1343       PetscCall(MatView(aij->A, viewer));
1344     } else {
1345       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1346     }
1347     PetscFunctionReturn(0);
1348   } else if (iascii && size == 1) {
1349     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1350     PetscCall(MatView(aij->A, viewer));
1351     PetscFunctionReturn(0);
1352   } else if (isdraw) {
1353     PetscDraw draw;
1354     PetscBool isnull;
1355     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1356     PetscCall(PetscDrawIsNull(draw, &isnull));
1357     if (isnull) PetscFunctionReturn(0);
1358   }
1359 
1360   { /* assemble the entire matrix onto first processor */
1361     Mat A = NULL, Av;
1362     IS  isrow, iscol;
1363 
1364     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1365     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1366     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1367     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1368     /*  The commented code uses MatCreateSubMatrices instead */
1369     /*
1370     Mat *AA, A = NULL, Av;
1371     IS  isrow,iscol;
1372 
1373     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1374     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1375     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1376     if (rank == 0) {
1377        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1378        A    = AA[0];
1379        Av   = AA[0];
1380     }
1381     PetscCall(MatDestroySubMatrices(1,&AA));
1382 */
1383     PetscCall(ISDestroy(&iscol));
1384     PetscCall(ISDestroy(&isrow));
1385     /*
1386        Everyone has to call to draw the matrix since the graphics waits are
1387        synchronized across all processors that share the PetscDraw object
1388     */
1389     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1390     if (rank == 0) {
1391       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1392       PetscCall(MatView_SeqAIJ(Av, sviewer));
1393     }
1394     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1395     PetscCall(PetscViewerFlush(viewer));
1396     PetscCall(MatDestroy(&A));
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1402 {
1403   PetscBool iascii, isdraw, issocket, isbinary;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1407   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1408   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1409   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1410   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1415 {
1416   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1417   Vec         bb1 = NULL;
1418   PetscBool   hasop;
1419 
1420   PetscFunctionBegin;
1421   if (flag == SOR_APPLY_UPPER) {
1422     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1423     PetscFunctionReturn(0);
1424   }
1425 
1426   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1427 
1428   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1429     if (flag & SOR_ZERO_INITIAL_GUESS) {
1430       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431       its--;
1432     }
1433 
1434     while (its--) {
1435       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1436       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437 
1438       /* update rhs: bb1 = bb - B*x */
1439       PetscCall(VecScale(mat->lvec, -1.0));
1440       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1441 
1442       /* local sweep */
1443       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1444     }
1445   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1446     if (flag & SOR_ZERO_INITIAL_GUESS) {
1447       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1448       its--;
1449     }
1450     while (its--) {
1451       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1452       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453 
1454       /* update rhs: bb1 = bb - B*x */
1455       PetscCall(VecScale(mat->lvec, -1.0));
1456       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1457 
1458       /* local sweep */
1459       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1460     }
1461   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1462     if (flag & SOR_ZERO_INITIAL_GUESS) {
1463       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1464       its--;
1465     }
1466     while (its--) {
1467       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1468       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1469 
1470       /* update rhs: bb1 = bb - B*x */
1471       PetscCall(VecScale(mat->lvec, -1.0));
1472       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1473 
1474       /* local sweep */
1475       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1476     }
1477   } else if (flag & SOR_EISENSTAT) {
1478     Vec xx1;
1479 
1480     PetscCall(VecDuplicate(bb, &xx1));
1481     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1482 
1483     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1484     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1485     if (!mat->diag) {
1486       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1487       PetscCall(MatGetDiagonal(matin, mat->diag));
1488     }
1489     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1490     if (hasop) {
1491       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1492     } else {
1493       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1494     }
1495     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1496 
1497     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1498 
1499     /* local sweep */
1500     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1501     PetscCall(VecAXPY(xx, 1.0, xx1));
1502     PetscCall(VecDestroy(&xx1));
1503   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1504 
1505   PetscCall(VecDestroy(&bb1));
1506 
1507   matin->factorerrortype = mat->A->factorerrortype;
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1512 {
1513   Mat             aA, aB, Aperm;
1514   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1515   PetscScalar    *aa, *ba;
1516   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1517   PetscSF         rowsf, sf;
1518   IS              parcolp = NULL;
1519   PetscBool       done;
1520 
1521   PetscFunctionBegin;
1522   PetscCall(MatGetLocalSize(A, &m, &n));
1523   PetscCall(ISGetIndices(rowp, &rwant));
1524   PetscCall(ISGetIndices(colp, &cwant));
1525   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1526 
1527   /* Invert row permutation to find out where my rows should go */
1528   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1529   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1530   PetscCall(PetscSFSetFromOptions(rowsf));
1531   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1532   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1533   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1534 
1535   /* Invert column permutation to find out where my columns should go */
1536   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1537   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1538   PetscCall(PetscSFSetFromOptions(sf));
1539   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1540   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1541   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(ISRestoreIndices(rowp, &rwant));
1545   PetscCall(ISRestoreIndices(colp, &cwant));
1546   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1547 
1548   /* Find out where my gcols should go */
1549   PetscCall(MatGetSize(aB, NULL, &ng));
1550   PetscCall(PetscMalloc1(ng, &gcdest));
1551   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1552   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1553   PetscCall(PetscSFSetFromOptions(sf));
1554   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1555   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1556   PetscCall(PetscSFDestroy(&sf));
1557 
1558   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1559   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1560   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1561   for (i = 0; i < m; i++) {
1562     PetscInt    row = rdest[i];
1563     PetscMPIInt rowner;
1564     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1565     for (j = ai[i]; j < ai[i + 1]; j++) {
1566       PetscInt    col = cdest[aj[j]];
1567       PetscMPIInt cowner;
1568       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1569       if (rowner == cowner) dnnz[i]++;
1570       else onnz[i]++;
1571     }
1572     for (j = bi[i]; j < bi[i + 1]; j++) {
1573       PetscInt    col = gcdest[bj[j]];
1574       PetscMPIInt cowner;
1575       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1576       if (rowner == cowner) dnnz[i]++;
1577       else onnz[i]++;
1578     }
1579   }
1580   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1581   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1582   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1583   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1584   PetscCall(PetscSFDestroy(&rowsf));
1585 
1586   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1587   PetscCall(MatSeqAIJGetArray(aA, &aa));
1588   PetscCall(MatSeqAIJGetArray(aB, &ba));
1589   for (i = 0; i < m; i++) {
1590     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1591     PetscInt  j0, rowlen;
1592     rowlen = ai[i + 1] - ai[i];
1593     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1594       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1595       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1596     }
1597     rowlen = bi[i + 1] - bi[i];
1598     for (j0 = j = 0; j < rowlen; j0 = j) {
1599       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1600       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1601     }
1602   }
1603   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1604   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1605   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1606   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1607   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1608   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1609   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1610   PetscCall(PetscFree3(work, rdest, cdest));
1611   PetscCall(PetscFree(gcdest));
1612   if (parcolp) PetscCall(ISDestroy(&colp));
1613   *B = Aperm;
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1618 {
1619   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1620 
1621   PetscFunctionBegin;
1622   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1623   if (ghosts) *ghosts = aij->garray;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1628 {
1629   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1630   Mat            A = mat->A, B = mat->B;
1631   PetscLogDouble isend[5], irecv[5];
1632 
1633   PetscFunctionBegin;
1634   info->block_size = 1.0;
1635   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1636 
1637   isend[0] = info->nz_used;
1638   isend[1] = info->nz_allocated;
1639   isend[2] = info->nz_unneeded;
1640   isend[3] = info->memory;
1641   isend[4] = info->mallocs;
1642 
1643   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1644 
1645   isend[0] += info->nz_used;
1646   isend[1] += info->nz_allocated;
1647   isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;
1649   isend[4] += info->mallocs;
1650   if (flag == MAT_LOCAL) {
1651     info->nz_used      = isend[0];
1652     info->nz_allocated = isend[1];
1653     info->nz_unneeded  = isend[2];
1654     info->memory       = isend[3];
1655     info->mallocs      = isend[4];
1656   } else if (flag == MAT_GLOBAL_MAX) {
1657     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1658 
1659     info->nz_used      = irecv[0];
1660     info->nz_allocated = irecv[1];
1661     info->nz_unneeded  = irecv[2];
1662     info->memory       = irecv[3];
1663     info->mallocs      = irecv[4];
1664   } else if (flag == MAT_GLOBAL_SUM) {
1665     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   }
1673   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1674   info->fill_ratio_needed = 0;
1675   info->factor_mallocs    = 0;
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1680 {
1681   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692   case MAT_FORM_EXPLICIT_TRANSPOSE:
1693     MatCheckPreallocated(A, 1);
1694     PetscCall(MatSetOption(a->A, op, flg));
1695     PetscCall(MatSetOption(a->B, op, flg));
1696     break;
1697   case MAT_ROW_ORIENTED:
1698     MatCheckPreallocated(A, 1);
1699     a->roworiented = flg;
1700 
1701     PetscCall(MatSetOption(a->A, op, flg));
1702     PetscCall(MatSetOption(a->B, op, flg));
1703     break;
1704   case MAT_FORCE_DIAGONAL_ENTRIES:
1705   case MAT_SORTED_FULL:
1706     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1712   case MAT_SPD:
1713   case MAT_SYMMETRIC:
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715   case MAT_HERMITIAN:
1716   case MAT_SYMMETRY_ETERNAL:
1717   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1718   case MAT_SPD_ETERNAL:
1719     /* if the diagonal matrix is square it inherits some of the properties above */
1720     break;
1721   case MAT_SUBMAT_SINGLEIS:
1722     A->submat_singleis = flg;
1723     break;
1724   case MAT_STRUCTURE_ONLY:
1725     /* The option is handled directly by MatSetOption() */
1726     break;
1727   default:
1728     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1734 {
1735   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1736   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1737   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1738   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1739   PetscInt    *cmap, *idx_p;
1740 
1741   PetscFunctionBegin;
1742   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1743   mat->getrowactive = PETSC_TRUE;
1744 
1745   if (!mat->rowvalues && (idx || v)) {
1746     /*
1747         allocate enough space to hold information from the longest row.
1748     */
1749     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1750     PetscInt    max = 1, tmp;
1751     for (i = 0; i < matin->rmap->n; i++) {
1752       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1753       if (max < tmp) max = tmp;
1754     }
1755     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1756   }
1757 
1758   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1759   lrow = row - rstart;
1760 
1761   pvA = &vworkA;
1762   pcA = &cworkA;
1763   pvB = &vworkB;
1764   pcB = &cworkB;
1765   if (!v) {
1766     pvA = NULL;
1767     pvB = NULL;
1768   }
1769   if (!idx) {
1770     pcA = NULL;
1771     if (!v) pcB = NULL;
1772   }
1773   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1774   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1775   nztot = nzA + nzB;
1776 
1777   cmap = mat->garray;
1778   if (v || idx) {
1779     if (nztot) {
1780       /* Sort by increasing column numbers, assuming A and B already sorted */
1781       PetscInt imark = -1;
1782       if (v) {
1783         *v = v_p = mat->rowvalues;
1784         for (i = 0; i < nzB; i++) {
1785           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1786           else break;
1787         }
1788         imark = i;
1789         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1790         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1791       }
1792       if (idx) {
1793         *idx = idx_p = mat->rowindices;
1794         if (imark > -1) {
1795           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1796         } else {
1797           for (i = 0; i < nzB; i++) {
1798             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1799             else break;
1800           }
1801           imark = i;
1802         }
1803         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1804         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1805       }
1806     } else {
1807       if (idx) *idx = NULL;
1808       if (v) *v = NULL;
1809     }
1810   }
1811   *nz = nztot;
1812   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1813   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1818 {
1819   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1820 
1821   PetscFunctionBegin;
1822   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1823   aij->getrowactive = PETSC_FALSE;
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1828 {
1829   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1830   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1831   PetscInt         i, j, cstart = mat->cmap->rstart;
1832   PetscReal        sum = 0.0;
1833   const MatScalar *v, *amata, *bmata;
1834 
1835   PetscFunctionBegin;
1836   if (aij->size == 1) {
1837     PetscCall(MatNorm(aij->A, type, norm));
1838   } else {
1839     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1840     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1841     if (type == NORM_FROBENIUS) {
1842       v = amata;
1843       for (i = 0; i < amat->nz; i++) {
1844         sum += PetscRealPart(PetscConj(*v) * (*v));
1845         v++;
1846       }
1847       v = bmata;
1848       for (i = 0; i < bmat->nz; i++) {
1849         sum += PetscRealPart(PetscConj(*v) * (*v));
1850         v++;
1851       }
1852       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       *norm = PetscSqrtReal(*norm);
1854       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1855     } else if (type == NORM_1) { /* max column norm */
1856       PetscReal *tmp, *tmp2;
1857       PetscInt  *jj, *garray = aij->garray;
1858       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1859       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1860       *norm = 0.0;
1861       v     = amata;
1862       jj    = amat->j;
1863       for (j = 0; j < amat->nz; j++) {
1864         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1865         v++;
1866       }
1867       v  = bmata;
1868       jj = bmat->j;
1869       for (j = 0; j < bmat->nz; j++) {
1870         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1871         v++;
1872       }
1873       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1874       for (j = 0; j < mat->cmap->N; j++) {
1875         if (tmp2[j] > *norm) *norm = tmp2[j];
1876       }
1877       PetscCall(PetscFree(tmp));
1878       PetscCall(PetscFree(tmp2));
1879       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1880     } else if (type == NORM_INFINITY) { /* max row norm */
1881       PetscReal ntemp = 0.0;
1882       for (j = 0; j < aij->A->rmap->n; j++) {
1883         v   = amata + amat->i[j];
1884         sum = 0.0;
1885         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1886           sum += PetscAbsScalar(*v);
1887           v++;
1888         }
1889         v = bmata + bmat->i[j];
1890         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v);
1892           v++;
1893         }
1894         if (sum > ntemp) ntemp = sum;
1895       }
1896       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1897       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1898     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1899     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1900     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1901   }
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1906 {
1907   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1908   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1909   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1910   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1911   Mat              B, A_diag, *B_diag;
1912   const MatScalar *pbv, *bv;
1913 
1914   PetscFunctionBegin;
1915   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1916   ma = A->rmap->n;
1917   na = A->cmap->n;
1918   mb = a->B->rmap->n;
1919   nb = a->B->cmap->n;
1920   ai = Aloc->i;
1921   aj = Aloc->j;
1922   bi = Bloc->i;
1923   bj = Bloc->j;
1924   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1925     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1926     PetscSFNode         *oloc;
1927     PETSC_UNUSED PetscSF sf;
1928 
1929     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1930     /* compute d_nnz for preallocation */
1931     PetscCall(PetscArrayzero(d_nnz, na));
1932     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1933     /* compute local off-diagonal contributions */
1934     PetscCall(PetscArrayzero(g_nnz, nb));
1935     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1936     /* map those to global */
1937     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1938     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1939     PetscCall(PetscSFSetFromOptions(sf));
1940     PetscCall(PetscArrayzero(o_nnz, na));
1941     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1942     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1943     PetscCall(PetscSFDestroy(&sf));
1944 
1945     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1946     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1947     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1948     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1949     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1950     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1951   } else {
1952     B = *matout;
1953     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1954   }
1955 
1956   b           = (Mat_MPIAIJ *)B->data;
1957   A_diag      = a->A;
1958   B_diag      = &b->A;
1959   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1960   A_diag_ncol = A_diag->cmap->N;
1961   B_diag_ilen = sub_B_diag->ilen;
1962   B_diag_i    = sub_B_diag->i;
1963 
1964   /* Set ilen for diagonal of B */
1965   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1966 
1967   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1968   very quickly (=without using MatSetValues), because all writes are local. */
1969   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1970   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1971 
1972   /* copy over the B part */
1973   PetscCall(PetscMalloc1(bi[mb], &cols));
1974   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1975   pbv = bv;
1976   row = A->rmap->rstart;
1977   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1978   cols_tmp = cols;
1979   for (i = 0; i < mb; i++) {
1980     ncol = bi[i + 1] - bi[i];
1981     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1982     row++;
1983     pbv += ncol;
1984     cols_tmp += ncol;
1985   }
1986   PetscCall(PetscFree(cols));
1987   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1988 
1989   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1990   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1991   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1992     *matout = B;
1993   } else {
1994     PetscCall(MatHeaderMerge(A, &B));
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2002   Mat         a = aij->A, b = aij->B;
2003   PetscInt    s1, s2, s3;
2004 
2005   PetscFunctionBegin;
2006   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2007   if (rr) {
2008     PetscCall(VecGetLocalSize(rr, &s1));
2009     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2010     /* Overlap communication with computation. */
2011     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2012   }
2013   if (ll) {
2014     PetscCall(VecGetLocalSize(ll, &s1));
2015     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2016     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2017   }
2018   /* scale  the diagonal block */
2019   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2020 
2021   if (rr) {
2022     /* Do a scatter end and then right scale the off-diagonal block */
2023     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2024     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2025   }
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2030 {
2031   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2032 
2033   PetscFunctionBegin;
2034   PetscCall(MatSetUnfactored(a->A));
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2039 {
2040   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2041   Mat         a, b, c, d;
2042   PetscBool   flg;
2043 
2044   PetscFunctionBegin;
2045   a = matA->A;
2046   b = matA->B;
2047   c = matB->A;
2048   d = matB->B;
2049 
2050   PetscCall(MatEqual(a, c, &flg));
2051   if (flg) PetscCall(MatEqual(b, d, &flg));
2052   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2057 {
2058   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2059   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2060 
2061   PetscFunctionBegin;
2062   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2063   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2064     /* because of the column compression in the off-processor part of the matrix a->B,
2065        the number of columns in a->B and b->B may be different, hence we cannot call
2066        the MatCopy() directly on the two parts. If need be, we can provide a more
2067        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2068        then copying the submatrices */
2069     PetscCall(MatCopy_Basic(A, B, str));
2070   } else {
2071     PetscCall(MatCopy(a->A, b->A, str));
2072     PetscCall(MatCopy(a->B, b->B, str));
2073   }
2074   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2079 {
2080   PetscFunctionBegin;
2081   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 /*
2086    Computes the number of nonzeros per row needed for preallocation when X and Y
2087    have different nonzero structure.
2088 */
2089 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2090 {
2091   PetscInt i, j, k, nzx, nzy;
2092 
2093   PetscFunctionBegin;
2094   /* Set the number of nonzeros in the new matrix */
2095   for (i = 0; i < m; i++) {
2096     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2097     nzx    = xi[i + 1] - xi[i];
2098     nzy    = yi[i + 1] - yi[i];
2099     nnz[i] = 0;
2100     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2101       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2102       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2103       nnz[i]++;
2104     }
2105     for (; k < nzy; k++) nnz[i]++;
2106   }
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2112 {
2113   PetscInt    m = Y->rmap->N;
2114   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2115   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2116 
2117   PetscFunctionBegin;
2118   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2123 {
2124   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2125 
2126   PetscFunctionBegin;
2127   if (str == SAME_NONZERO_PATTERN) {
2128     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2129     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2130   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2131     PetscCall(MatAXPY_Basic(Y, a, X, str));
2132   } else {
2133     Mat       B;
2134     PetscInt *nnz_d, *nnz_o;
2135 
2136     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2137     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2138     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2139     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2140     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2141     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2142     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2143     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2144     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2145     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2146     PetscCall(MatHeaderMerge(Y, &B));
2147     PetscCall(PetscFree(nnz_d));
2148     PetscCall(PetscFree(nnz_o));
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2154 
2155 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2156 {
2157   PetscFunctionBegin;
2158   if (PetscDefined(USE_COMPLEX)) {
2159     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2160 
2161     PetscCall(MatConjugate_SeqAIJ(aij->A));
2162     PetscCall(MatConjugate_SeqAIJ(aij->B));
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2170 
2171   PetscFunctionBegin;
2172   PetscCall(MatRealPart(a->A));
2173   PetscCall(MatRealPart(a->B));
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2178 {
2179   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2180 
2181   PetscFunctionBegin;
2182   PetscCall(MatImaginaryPart(a->A));
2183   PetscCall(MatImaginaryPart(a->B));
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2188 {
2189   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2190   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2191   PetscScalar       *va, *vv;
2192   Vec                vB, vA;
2193   const PetscScalar *vb;
2194 
2195   PetscFunctionBegin;
2196   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2197   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2198 
2199   PetscCall(VecGetArrayWrite(vA, &va));
2200   if (idx) {
2201     for (i = 0; i < m; i++) {
2202       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2203     }
2204   }
2205 
2206   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2207   PetscCall(PetscMalloc1(m, &idxb));
2208   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2209 
2210   PetscCall(VecGetArrayWrite(v, &vv));
2211   PetscCall(VecGetArrayRead(vB, &vb));
2212   for (i = 0; i < m; i++) {
2213     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2214       vv[i] = vb[i];
2215       if (idx) idx[i] = a->garray[idxb[i]];
2216     } else {
2217       vv[i] = va[i];
2218       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2219     }
2220   }
2221   PetscCall(VecRestoreArrayWrite(vA, &vv));
2222   PetscCall(VecRestoreArrayWrite(vA, &va));
2223   PetscCall(VecRestoreArrayRead(vB, &vb));
2224   PetscCall(PetscFree(idxb));
2225   PetscCall(VecDestroy(&vA));
2226   PetscCall(VecDestroy(&vB));
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231 {
2232   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2233   PetscInt           m = A->rmap->n, n = A->cmap->n;
2234   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2235   PetscInt          *cmap = mat->garray;
2236   PetscInt          *diagIdx, *offdiagIdx;
2237   Vec                diagV, offdiagV;
2238   PetscScalar       *a, *diagA, *offdiagA;
2239   const PetscScalar *ba, *bav;
2240   PetscInt           r, j, col, ncols, *bi, *bj;
2241   Mat                B = mat->B;
2242   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2243 
2244   PetscFunctionBegin;
2245   /* When a process holds entire A and other processes have no entry */
2246   if (A->cmap->N == n) {
2247     PetscCall(VecGetArrayWrite(v, &diagA));
2248     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2249     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2250     PetscCall(VecDestroy(&diagV));
2251     PetscCall(VecRestoreArrayWrite(v, &diagA));
2252     PetscFunctionReturn(0);
2253   } else if (n == 0) {
2254     if (m) {
2255       PetscCall(VecGetArrayWrite(v, &a));
2256       for (r = 0; r < m; r++) {
2257         a[r] = 0.0;
2258         if (idx) idx[r] = -1;
2259       }
2260       PetscCall(VecRestoreArrayWrite(v, &a));
2261     }
2262     PetscFunctionReturn(0);
2263   }
2264 
2265   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2266   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2267   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2268   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2269 
2270   /* Get offdiagIdx[] for implicit 0.0 */
2271   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2272   ba = bav;
2273   bi = b->i;
2274   bj = b->j;
2275   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2276   for (r = 0; r < m; r++) {
2277     ncols = bi[r + 1] - bi[r];
2278     if (ncols == A->cmap->N - n) { /* Brow is dense */
2279       offdiagA[r]   = *ba;
2280       offdiagIdx[r] = cmap[0];
2281     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2282       offdiagA[r] = 0.0;
2283 
2284       /* Find first hole in the cmap */
2285       for (j = 0; j < ncols; j++) {
2286         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2287         if (col > j && j < cstart) {
2288           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2289           break;
2290         } else if (col > j + n && j >= cstart) {
2291           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2292           break;
2293         }
2294       }
2295       if (j == ncols && ncols < A->cmap->N - n) {
2296         /* a hole is outside compressed Bcols */
2297         if (ncols == 0) {
2298           if (cstart) {
2299             offdiagIdx[r] = 0;
2300           } else offdiagIdx[r] = cend;
2301         } else { /* ncols > 0 */
2302           offdiagIdx[r] = cmap[ncols - 1] + 1;
2303           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2304         }
2305       }
2306     }
2307 
2308     for (j = 0; j < ncols; j++) {
2309       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2310         offdiagA[r]   = *ba;
2311         offdiagIdx[r] = cmap[*bj];
2312       }
2313       ba++;
2314       bj++;
2315     }
2316   }
2317 
2318   PetscCall(VecGetArrayWrite(v, &a));
2319   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2320   for (r = 0; r < m; ++r) {
2321     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) idx[r] = cstart + diagIdx[r];
2324     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2325       a[r] = diagA[r];
2326       if (idx) {
2327         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2328           idx[r] = cstart + diagIdx[r];
2329         } else idx[r] = offdiagIdx[r];
2330       }
2331     } else {
2332       a[r] = offdiagA[r];
2333       if (idx) idx[r] = offdiagIdx[r];
2334     }
2335   }
2336   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2337   PetscCall(VecRestoreArrayWrite(v, &a));
2338   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2339   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2340   PetscCall(VecDestroy(&diagV));
2341   PetscCall(VecDestroy(&offdiagV));
2342   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347 {
2348   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2349   PetscInt           m = A->rmap->n, n = A->cmap->n;
2350   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2351   PetscInt          *cmap = mat->garray;
2352   PetscInt          *diagIdx, *offdiagIdx;
2353   Vec                diagV, offdiagV;
2354   PetscScalar       *a, *diagA, *offdiagA;
2355   const PetscScalar *ba, *bav;
2356   PetscInt           r, j, col, ncols, *bi, *bj;
2357   Mat                B = mat->B;
2358   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2359 
2360   PetscFunctionBegin;
2361   /* When a process holds entire A and other processes have no entry */
2362   if (A->cmap->N == n) {
2363     PetscCall(VecGetArrayWrite(v, &diagA));
2364     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2365     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2366     PetscCall(VecDestroy(&diagV));
2367     PetscCall(VecRestoreArrayWrite(v, &diagA));
2368     PetscFunctionReturn(0);
2369   } else if (n == 0) {
2370     if (m) {
2371       PetscCall(VecGetArrayWrite(v, &a));
2372       for (r = 0; r < m; r++) {
2373         a[r] = PETSC_MAX_REAL;
2374         if (idx) idx[r] = -1;
2375       }
2376       PetscCall(VecRestoreArrayWrite(v, &a));
2377     }
2378     PetscFunctionReturn(0);
2379   }
2380 
2381   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2382   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2383   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2384   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2385 
2386   /* Get offdiagIdx[] for implicit 0.0 */
2387   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2388   ba = bav;
2389   bi = b->i;
2390   bj = b->j;
2391   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2392   for (r = 0; r < m; r++) {
2393     ncols = bi[r + 1] - bi[r];
2394     if (ncols == A->cmap->N - n) { /* Brow is dense */
2395       offdiagA[r]   = *ba;
2396       offdiagIdx[r] = cmap[0];
2397     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398       offdiagA[r] = 0.0;
2399 
2400       /* Find first hole in the cmap */
2401       for (j = 0; j < ncols; j++) {
2402         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403         if (col > j && j < cstart) {
2404           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405           break;
2406         } else if (col > j + n && j >= cstart) {
2407           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408           break;
2409         }
2410       }
2411       if (j == ncols && ncols < A->cmap->N - n) {
2412         /* a hole is outside compressed Bcols */
2413         if (ncols == 0) {
2414           if (cstart) {
2415             offdiagIdx[r] = 0;
2416           } else offdiagIdx[r] = cend;
2417         } else { /* ncols > 0 */
2418           offdiagIdx[r] = cmap[ncols - 1] + 1;
2419           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420         }
2421       }
2422     }
2423 
2424     for (j = 0; j < ncols; j++) {
2425       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2426         offdiagA[r]   = *ba;
2427         offdiagIdx[r] = cmap[*bj];
2428       }
2429       ba++;
2430       bj++;
2431     }
2432   }
2433 
2434   PetscCall(VecGetArrayWrite(v, &a));
2435   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2436   for (r = 0; r < m; ++r) {
2437     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2438       a[r] = diagA[r];
2439       if (idx) idx[r] = cstart + diagIdx[r];
2440     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2441       a[r] = diagA[r];
2442       if (idx) {
2443         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2444           idx[r] = cstart + diagIdx[r];
2445         } else idx[r] = offdiagIdx[r];
2446       }
2447     } else {
2448       a[r] = offdiagA[r];
2449       if (idx) idx[r] = offdiagIdx[r];
2450     }
2451   }
2452   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2453   PetscCall(VecRestoreArrayWrite(v, &a));
2454   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2455   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2456   PetscCall(VecDestroy(&diagV));
2457   PetscCall(VecDestroy(&offdiagV));
2458   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2463 {
2464   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2465   PetscInt           m = A->rmap->n, n = A->cmap->n;
2466   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2467   PetscInt          *cmap = mat->garray;
2468   PetscInt          *diagIdx, *offdiagIdx;
2469   Vec                diagV, offdiagV;
2470   PetscScalar       *a, *diagA, *offdiagA;
2471   const PetscScalar *ba, *bav;
2472   PetscInt           r, j, col, ncols, *bi, *bj;
2473   Mat                B = mat->B;
2474   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2475 
2476   PetscFunctionBegin;
2477   /* When a process holds entire A and other processes have no entry */
2478   if (A->cmap->N == n) {
2479     PetscCall(VecGetArrayWrite(v, &diagA));
2480     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2481     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2482     PetscCall(VecDestroy(&diagV));
2483     PetscCall(VecRestoreArrayWrite(v, &diagA));
2484     PetscFunctionReturn(0);
2485   } else if (n == 0) {
2486     if (m) {
2487       PetscCall(VecGetArrayWrite(v, &a));
2488       for (r = 0; r < m; r++) {
2489         a[r] = PETSC_MIN_REAL;
2490         if (idx) idx[r] = -1;
2491       }
2492       PetscCall(VecRestoreArrayWrite(v, &a));
2493     }
2494     PetscFunctionReturn(0);
2495   }
2496 
2497   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2498   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2499   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2500   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2501 
2502   /* Get offdiagIdx[] for implicit 0.0 */
2503   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2504   ba = bav;
2505   bi = b->i;
2506   bj = b->j;
2507   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2508   for (r = 0; r < m; r++) {
2509     ncols = bi[r + 1] - bi[r];
2510     if (ncols == A->cmap->N - n) { /* Brow is dense */
2511       offdiagA[r]   = *ba;
2512       offdiagIdx[r] = cmap[0];
2513     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2514       offdiagA[r] = 0.0;
2515 
2516       /* Find first hole in the cmap */
2517       for (j = 0; j < ncols; j++) {
2518         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2519         if (col > j && j < cstart) {
2520           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2521           break;
2522         } else if (col > j + n && j >= cstart) {
2523           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2524           break;
2525         }
2526       }
2527       if (j == ncols && ncols < A->cmap->N - n) {
2528         /* a hole is outside compressed Bcols */
2529         if (ncols == 0) {
2530           if (cstart) {
2531             offdiagIdx[r] = 0;
2532           } else offdiagIdx[r] = cend;
2533         } else { /* ncols > 0 */
2534           offdiagIdx[r] = cmap[ncols - 1] + 1;
2535           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2536         }
2537       }
2538     }
2539 
2540     for (j = 0; j < ncols; j++) {
2541       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2542         offdiagA[r]   = *ba;
2543         offdiagIdx[r] = cmap[*bj];
2544       }
2545       ba++;
2546       bj++;
2547     }
2548   }
2549 
2550   PetscCall(VecGetArrayWrite(v, &a));
2551   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2552   for (r = 0; r < m; ++r) {
2553     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2554       a[r] = diagA[r];
2555       if (idx) idx[r] = cstart + diagIdx[r];
2556     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2557       a[r] = diagA[r];
2558       if (idx) {
2559         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2560           idx[r] = cstart + diagIdx[r];
2561         } else idx[r] = offdiagIdx[r];
2562       }
2563     } else {
2564       a[r] = offdiagA[r];
2565       if (idx) idx[r] = offdiagIdx[r];
2566     }
2567   }
2568   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2569   PetscCall(VecRestoreArrayWrite(v, &a));
2570   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2571   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2572   PetscCall(VecDestroy(&diagV));
2573   PetscCall(VecDestroy(&offdiagV));
2574   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2579 {
2580   Mat *dummy;
2581 
2582   PetscFunctionBegin;
2583   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2584   *newmat = *dummy;
2585   PetscCall(PetscFree(dummy));
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2590 {
2591   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(MatInvertBlockDiagonal(a->A, values));
2595   A->factorerrortype = a->A->factorerrortype;
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2600 {
2601   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2602 
2603   PetscFunctionBegin;
2604   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2605   PetscCall(MatSetRandom(aij->A, rctx));
2606   if (x->assembled) {
2607     PetscCall(MatSetRandom(aij->B, rctx));
2608   } else {
2609     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2610   }
2611   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2612   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2617 {
2618   PetscFunctionBegin;
2619   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2620   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 /*@
2625    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2626 
2627    Not collective
2628 
2629    Input Parameter:
2630 .    A - the matrix
2631 
2632    Output Parameter:
2633 .    nz - the number of nonzeros
2634 
2635  Level: advanced
2636 
2637 .seealso: `MATMPIAIJ`, `Mat`
2638 @*/
2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2640 {
2641   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2642   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2643 
2644   PetscFunctionBegin;
2645   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 /*@
2650    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2651 
2652    Collective
2653 
2654    Input Parameters:
2655 +    A - the matrix
2656 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2657 
2658  Level: advanced
2659 
2660 @*/
2661 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2662 {
2663   PetscFunctionBegin;
2664   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2669 {
2670   PetscBool sc = PETSC_FALSE, flg;
2671 
2672   PetscFunctionBegin;
2673   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2674   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2675   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2676   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2677   PetscOptionsHeadEnd();
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2682 {
2683   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2684   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2685 
2686   PetscFunctionBegin;
2687   if (!Y->preallocated) {
2688     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2689   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2690     PetscInt nonew = aij->nonew;
2691     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2692     aij->nonew = nonew;
2693   }
2694   PetscCall(MatShift_Basic(Y, a));
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2704   PetscCall(MatMissingDiagonal(a->A, missing, d));
2705   if (d) {
2706     PetscInt rstart;
2707     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2708     *d += rstart;
2709   }
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2714 {
2715   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2716 
2717   PetscFunctionBegin;
2718   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2723 {
2724   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2725 
2726   PetscFunctionBegin;
2727   PetscCall(MatEliminateZeros(a->A));
2728   PetscCall(MatEliminateZeros(a->B));
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 /* -------------------------------------------------------------------*/
2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2734                                        MatGetRow_MPIAIJ,
2735                                        MatRestoreRow_MPIAIJ,
2736                                        MatMult_MPIAIJ,
2737                                        /* 4*/ MatMultAdd_MPIAIJ,
2738                                        MatMultTranspose_MPIAIJ,
2739                                        MatMultTransposeAdd_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        /*10*/ NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        MatSOR_MPIAIJ,
2747                                        MatTranspose_MPIAIJ,
2748                                        /*15*/ MatGetInfo_MPIAIJ,
2749                                        MatEqual_MPIAIJ,
2750                                        MatGetDiagonal_MPIAIJ,
2751                                        MatDiagonalScale_MPIAIJ,
2752                                        MatNorm_MPIAIJ,
2753                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2754                                        MatAssemblyEnd_MPIAIJ,
2755                                        MatSetOption_MPIAIJ,
2756                                        MatZeroEntries_MPIAIJ,
2757                                        /*24*/ MatZeroRows_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        /*29*/ MatSetUp_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        MatGetDiagonalBlock_MPIAIJ,
2766                                        NULL,
2767                                        /*34*/ MatDuplicate_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                        /*39*/ MatAXPY_MPIAIJ,
2773                                        MatCreateSubMatrices_MPIAIJ,
2774                                        MatIncreaseOverlap_MPIAIJ,
2775                                        MatGetValues_MPIAIJ,
2776                                        MatCopy_MPIAIJ,
2777                                        /*44*/ MatGetRowMax_MPIAIJ,
2778                                        MatScale_MPIAIJ,
2779                                        MatShift_MPIAIJ,
2780                                        MatDiagonalSet_MPIAIJ,
2781                                        MatZeroRowsColumns_MPIAIJ,
2782                                        /*49*/ MatSetRandom_MPIAIJ,
2783                                        MatGetRowIJ_MPIAIJ,
2784                                        MatRestoreRowIJ_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2788                                        NULL,
2789                                        MatSetUnfactored_MPIAIJ,
2790                                        MatPermute_MPIAIJ,
2791                                        NULL,
2792                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2793                                        MatDestroy_MPIAIJ,
2794                                        MatView_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        /*64*/ NULL,
2798                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2803                                        MatGetRowMinAbs_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        /*75*/ MatFDColoringApply_AIJ,
2809                                        MatSetFromOptions_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        MatFindZeroDiagonals_MPIAIJ,
2813                                        /*80*/ NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        /*83*/ MatLoad_MPIAIJ,
2817                                        MatIsSymmetric_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        /*89*/ NULL,
2823                                        NULL,
2824                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        NULL,
2831                                        MatBindToCPU_MPIAIJ,
2832                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        MatConjugate_MPIAIJ,
2836                                        NULL,
2837                                        /*104*/ MatSetValuesRow_MPIAIJ,
2838                                        MatRealPart_MPIAIJ,
2839                                        MatImaginaryPart_MPIAIJ,
2840                                        NULL,
2841                                        NULL,
2842                                        /*109*/ NULL,
2843                                        NULL,
2844                                        MatGetRowMin_MPIAIJ,
2845                                        NULL,
2846                                        MatMissingDiagonal_MPIAIJ,
2847                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2848                                        NULL,
2849                                        MatGetGhosts_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatGetMultiProcBlock_MPIAIJ,
2857                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2858                                        MatGetColumnReductions_MPIAIJ,
2859                                        MatInvertBlockDiagonal_MPIAIJ,
2860                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2861                                        MatCreateSubMatricesMPI_MPIAIJ,
2862                                        /*129*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2866                                        NULL,
2867                                        /*134*/ NULL,
2868                                        NULL,
2869                                        NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2873                                        NULL,
2874                                        NULL,
2875                                        MatFDColoringSetUp_MPIXAIJ,
2876                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2877                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2878                                        /*145*/ NULL,
2879                                        NULL,
2880                                        NULL,
2881                                        MatCreateGraph_Simple_AIJ,
2882                                        NULL,
2883                                        /*150*/ NULL,
2884                                        MatEliminateZeros_MPIAIJ};
2885 
2886 /* ----------------------------------------------------------------------------------------*/
2887 
2888 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2889 {
2890   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2891 
2892   PetscFunctionBegin;
2893   PetscCall(MatStoreValues(aij->A));
2894   PetscCall(MatStoreValues(aij->B));
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2899 {
2900   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2901 
2902   PetscFunctionBegin;
2903   PetscCall(MatRetrieveValues(aij->A));
2904   PetscCall(MatRetrieveValues(aij->B));
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2909 {
2910   Mat_MPIAIJ *b;
2911   PetscMPIInt size;
2912 
2913   PetscFunctionBegin;
2914   PetscCall(PetscLayoutSetUp(B->rmap));
2915   PetscCall(PetscLayoutSetUp(B->cmap));
2916   b = (Mat_MPIAIJ *)B->data;
2917 
2918 #if defined(PETSC_USE_CTABLE)
2919   PetscCall(PetscHMapIDestroy(&b->colmap));
2920 #else
2921   PetscCall(PetscFree(b->colmap));
2922 #endif
2923   PetscCall(PetscFree(b->garray));
2924   PetscCall(VecDestroy(&b->lvec));
2925   PetscCall(VecScatterDestroy(&b->Mvctx));
2926 
2927   /* Because the B will have been resized we simply destroy it and create a new one each time */
2928   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2929   PetscCall(MatDestroy(&b->B));
2930   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2931   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2932   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2933   PetscCall(MatSetType(b->B, MATSEQAIJ));
2934 
2935   if (!B->preallocated) {
2936     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939     PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   }
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b;
2953 
2954   PetscFunctionBegin;
2955   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2956   PetscCall(PetscLayoutSetUp(B->rmap));
2957   PetscCall(PetscLayoutSetUp(B->cmap));
2958   b = (Mat_MPIAIJ *)B->data;
2959 
2960 #if defined(PETSC_USE_CTABLE)
2961   PetscCall(PetscHMapIDestroy(&b->colmap));
2962 #else
2963   PetscCall(PetscFree(b->colmap));
2964 #endif
2965   PetscCall(PetscFree(b->garray));
2966   PetscCall(VecDestroy(&b->lvec));
2967   PetscCall(VecScatterDestroy(&b->Mvctx));
2968 
2969   PetscCall(MatResetPreallocation(b->A));
2970   PetscCall(MatResetPreallocation(b->B));
2971   B->preallocated  = PETSC_TRUE;
2972   B->was_assembled = PETSC_FALSE;
2973   B->assembled     = PETSC_FALSE;
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2978 {
2979   Mat         mat;
2980   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2981 
2982   PetscFunctionBegin;
2983   *newmat = NULL;
2984   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2985   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2986   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2987   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2988   a = (Mat_MPIAIJ *)mat->data;
2989 
2990   mat->factortype   = matin->factortype;
2991   mat->assembled    = matin->assembled;
2992   mat->insertmode   = NOT_SET_VALUES;
2993   mat->preallocated = matin->preallocated;
2994 
2995   a->size         = oldmat->size;
2996   a->rank         = oldmat->rank;
2997   a->donotstash   = oldmat->donotstash;
2998   a->roworiented  = oldmat->roworiented;
2999   a->rowindices   = NULL;
3000   a->rowvalues    = NULL;
3001   a->getrowactive = PETSC_FALSE;
3002 
3003   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3004   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3005 
3006   if (oldmat->colmap) {
3007 #if defined(PETSC_USE_CTABLE)
3008     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3009 #else
3010     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3011     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3012 #endif
3013   } else a->colmap = NULL;
3014   if (oldmat->garray) {
3015     PetscInt len;
3016     len = oldmat->B->cmap->n;
3017     PetscCall(PetscMalloc1(len + 1, &a->garray));
3018     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3019   } else a->garray = NULL;
3020 
3021   /* It may happen MatDuplicate is called with a non-assembled matrix
3022      In fact, MatDuplicate only requires the matrix to be preallocated
3023      This may happen inside a DMCreateMatrix_Shell */
3024   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
3025   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
3026   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3027   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3028   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3029   *newmat = mat;
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3034 {
3035   PetscBool isbinary, ishdf5;
3036 
3037   PetscFunctionBegin;
3038   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3039   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3040   /* force binary viewer to load .info file if it has not yet done so */
3041   PetscCall(PetscViewerSetUp(viewer));
3042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3043   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3044   if (isbinary) {
3045     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3046   } else if (ishdf5) {
3047 #if defined(PETSC_HAVE_HDF5)
3048     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3049 #else
3050     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3051 #endif
3052   } else {
3053     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3054   }
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3059 {
3060   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3061   PetscInt    *rowidxs, *colidxs;
3062   PetscScalar *matvals;
3063 
3064   PetscFunctionBegin;
3065   PetscCall(PetscViewerSetUp(viewer));
3066 
3067   /* read in matrix header */
3068   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3069   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3070   M  = header[1];
3071   N  = header[2];
3072   nz = header[3];
3073   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3074   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3075   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3076 
3077   /* set block sizes from the viewer's .info file */
3078   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3079   /* set global sizes if not set already */
3080   if (mat->rmap->N < 0) mat->rmap->N = M;
3081   if (mat->cmap->N < 0) mat->cmap->N = N;
3082   PetscCall(PetscLayoutSetUp(mat->rmap));
3083   PetscCall(PetscLayoutSetUp(mat->cmap));
3084 
3085   /* check if the matrix sizes are correct */
3086   PetscCall(MatGetSize(mat, &rows, &cols));
3087   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3088 
3089   /* read in row lengths and build row indices */
3090   PetscCall(MatGetLocalSize(mat, &m, NULL));
3091   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3092   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3093   rowidxs[0] = 0;
3094   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3095   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3096   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3097   /* read in column indices and matrix values */
3098   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3100   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3101   /* store matrix indices and values */
3102   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3103   PetscCall(PetscFree(rowidxs));
3104   PetscCall(PetscFree2(colidxs, matvals));
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 /* Not scalable because of ISAllGather() unless getting all columns. */
3109 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3110 {
3111   IS          iscol_local;
3112   PetscBool   isstride;
3113   PetscMPIInt lisstride = 0, gisstride;
3114 
3115   PetscFunctionBegin;
3116   /* check if we are grabbing all columns*/
3117   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3118 
3119   if (isstride) {
3120     PetscInt start, len, mstart, mlen;
3121     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3122     PetscCall(ISGetLocalSize(iscol, &len));
3123     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3124     if (mstart == start && mlen - mstart == len) lisstride = 1;
3125   }
3126 
3127   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3128   if (gisstride) {
3129     PetscInt N;
3130     PetscCall(MatGetSize(mat, NULL, &N));
3131     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3132     PetscCall(ISSetIdentity(iscol_local));
3133     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3134   } else {
3135     PetscInt cbs;
3136     PetscCall(ISGetBlockSize(iscol, &cbs));
3137     PetscCall(ISAllGather(iscol, &iscol_local));
3138     PetscCall(ISSetBlockSize(iscol_local, cbs));
3139   }
3140 
3141   *isseq = iscol_local;
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 /*
3146  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3147  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3148 
3149  Input Parameters:
3150    mat - matrix
3151    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3152            i.e., mat->rstart <= isrow[i] < mat->rend
3153    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3154            i.e., mat->cstart <= iscol[i] < mat->cend
3155  Output Parameter:
3156    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3157    iscol_o - sequential column index set for retrieving mat->B
3158    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3159  */
3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3161 {
3162   Vec             x, cmap;
3163   const PetscInt *is_idx;
3164   PetscScalar    *xarray, *cmaparray;
3165   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3166   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3167   Mat             B    = a->B;
3168   Vec             lvec = a->lvec, lcmap;
3169   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3170   MPI_Comm        comm;
3171   VecScatter      Mvctx = a->Mvctx;
3172 
3173   PetscFunctionBegin;
3174   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3175   PetscCall(ISGetLocalSize(iscol, &ncols));
3176 
3177   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3178   PetscCall(MatCreateVecs(mat, &x, NULL));
3179   PetscCall(VecSet(x, -1.0));
3180   PetscCall(VecDuplicate(x, &cmap));
3181   PetscCall(VecSet(cmap, -1.0));
3182 
3183   /* Get start indices */
3184   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3185   isstart -= ncols;
3186   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3187 
3188   PetscCall(ISGetIndices(iscol, &is_idx));
3189   PetscCall(VecGetArray(x, &xarray));
3190   PetscCall(VecGetArray(cmap, &cmaparray));
3191   PetscCall(PetscMalloc1(ncols, &idx));
3192   for (i = 0; i < ncols; i++) {
3193     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3194     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3195     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3196   }
3197   PetscCall(VecRestoreArray(x, &xarray));
3198   PetscCall(VecRestoreArray(cmap, &cmaparray));
3199   PetscCall(ISRestoreIndices(iscol, &is_idx));
3200 
3201   /* Get iscol_d */
3202   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3203   PetscCall(ISGetBlockSize(iscol, &i));
3204   PetscCall(ISSetBlockSize(*iscol_d, i));
3205 
3206   /* Get isrow_d */
3207   PetscCall(ISGetLocalSize(isrow, &m));
3208   rstart = mat->rmap->rstart;
3209   PetscCall(PetscMalloc1(m, &idx));
3210   PetscCall(ISGetIndices(isrow, &is_idx));
3211   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3212   PetscCall(ISRestoreIndices(isrow, &is_idx));
3213 
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3215   PetscCall(ISGetBlockSize(isrow, &i));
3216   PetscCall(ISSetBlockSize(*isrow_d, i));
3217 
3218   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3219   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   PetscCall(VecDuplicate(lvec, &lcmap));
3223 
3224   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3225   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226 
3227   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3228   /* off-process column indices */
3229   count = 0;
3230   PetscCall(PetscMalloc1(Bn, &idx));
3231   PetscCall(PetscMalloc1(Bn, &cmap1));
3232 
3233   PetscCall(VecGetArray(lvec, &xarray));
3234   PetscCall(VecGetArray(lcmap, &cmaparray));
3235   for (i = 0; i < Bn; i++) {
3236     if (PetscRealPart(xarray[i]) > -1.0) {
3237       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3238       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3239       count++;
3240     }
3241   }
3242   PetscCall(VecRestoreArray(lvec, &xarray));
3243   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3244 
3245   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3246   /* cannot ensure iscol_o has same blocksize as iscol! */
3247 
3248   PetscCall(PetscFree(idx));
3249   *garray = cmap1;
3250 
3251   PetscCall(VecDestroy(&x));
3252   PetscCall(VecDestroy(&cmap));
3253   PetscCall(VecDestroy(&lcmap));
3254   PetscFunctionReturn(0);
3255 }
3256 
3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3259 {
3260   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3261   Mat         M = NULL;
3262   MPI_Comm    comm;
3263   IS          iscol_d, isrow_d, iscol_o;
3264   Mat         Asub = NULL, Bsub = NULL;
3265   PetscInt    n;
3266 
3267   PetscFunctionBegin;
3268   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3269 
3270   if (call == MAT_REUSE_MATRIX) {
3271     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3273     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3274 
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3276     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3277 
3278     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3279     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3280 
3281     /* Update diagonal and off-diagonal portions of submat */
3282     asub = (Mat_MPIAIJ *)(*submat)->data;
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3284     PetscCall(ISGetLocalSize(iscol_o, &n));
3285     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3286     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3287     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3288 
3289   } else { /* call == MAT_INITIAL_MATRIX) */
3290     const PetscInt *garray;
3291     PetscInt        BsubN;
3292 
3293     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3294     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3295 
3296     /* Create local submatrices Asub and Bsub */
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3299 
3300     /* Create submatrix M */
3301     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3302 
3303     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3304     asub = (Mat_MPIAIJ *)M->data;
3305 
3306     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3307     n = asub->B->cmap->N;
3308     if (BsubN > n) {
3309       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3310       const PetscInt *idx;
3311       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3312       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3313 
3314       PetscCall(PetscMalloc1(n, &idx_new));
3315       j = 0;
3316       PetscCall(ISGetIndices(iscol_o, &idx));
3317       for (i = 0; i < n; i++) {
3318         if (j >= BsubN) break;
3319         while (subgarray[i] > garray[j]) j++;
3320 
3321         if (subgarray[i] == garray[j]) {
3322           idx_new[i] = idx[j++];
3323         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3324       }
3325       PetscCall(ISRestoreIndices(iscol_o, &idx));
3326 
3327       PetscCall(ISDestroy(&iscol_o));
3328       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3329 
3330     } else if (BsubN < n) {
3331       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3332     }
3333 
3334     PetscCall(PetscFree(garray));
3335     *submat = M;
3336 
3337     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3338     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3339     PetscCall(ISDestroy(&isrow_d));
3340 
3341     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3342     PetscCall(ISDestroy(&iscol_d));
3343 
3344     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3345     PetscCall(ISDestroy(&iscol_o));
3346   }
3347   PetscFunctionReturn(0);
3348 }
3349 
3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3351 {
3352   IS        iscol_local = NULL, isrow_d;
3353   PetscInt  csize;
3354   PetscInt  n, i, j, start, end;
3355   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3356   MPI_Comm  comm;
3357 
3358   PetscFunctionBegin;
3359   /* If isrow has same processor distribution as mat,
3360      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3361   if (call == MAT_REUSE_MATRIX) {
3362     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3363     if (isrow_d) {
3364       sameRowDist  = PETSC_TRUE;
3365       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3366     } else {
3367       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3368       if (iscol_local) {
3369         sameRowDist  = PETSC_TRUE;
3370         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3371       }
3372     }
3373   } else {
3374     /* Check if isrow has same processor distribution as mat */
3375     sameDist[0] = PETSC_FALSE;
3376     PetscCall(ISGetLocalSize(isrow, &n));
3377     if (!n) {
3378       sameDist[0] = PETSC_TRUE;
3379     } else {
3380       PetscCall(ISGetMinMax(isrow, &i, &j));
3381       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3382       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3383     }
3384 
3385     /* Check if iscol has same processor distribution as mat */
3386     sameDist[1] = PETSC_FALSE;
3387     PetscCall(ISGetLocalSize(iscol, &n));
3388     if (!n) {
3389       sameDist[1] = PETSC_TRUE;
3390     } else {
3391       PetscCall(ISGetMinMax(iscol, &i, &j));
3392       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3393       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3394     }
3395 
3396     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3397     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3398     sameRowDist = tsameDist[0];
3399   }
3400 
3401   if (sameRowDist) {
3402     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3403       /* isrow and iscol have same processor distribution as mat */
3404       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3405       PetscFunctionReturn(0);
3406     } else { /* sameRowDist */
3407       /* isrow has same processor distribution as mat */
3408       if (call == MAT_INITIAL_MATRIX) {
3409         PetscBool sorted;
3410         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3411         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3412         PetscCall(ISGetSize(iscol, &i));
3413         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3414 
3415         PetscCall(ISSorted(iscol_local, &sorted));
3416         if (sorted) {
3417           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3418           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3419           PetscFunctionReturn(0);
3420         }
3421       } else { /* call == MAT_REUSE_MATRIX */
3422         IS iscol_sub;
3423         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3424         if (iscol_sub) {
3425           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3426           PetscFunctionReturn(0);
3427         }
3428       }
3429     }
3430   }
3431 
3432   /* General case: iscol -> iscol_local which has global size of iscol */
3433   if (call == MAT_REUSE_MATRIX) {
3434     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3435     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3436   } else {
3437     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3438   }
3439 
3440   PetscCall(ISGetLocalSize(iscol, &csize));
3441   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3442 
3443   if (call == MAT_INITIAL_MATRIX) {
3444     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3445     PetscCall(ISDestroy(&iscol_local));
3446   }
3447   PetscFunctionReturn(0);
3448 }
3449 
3450 /*@C
3451      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3452          and "off-diagonal" part of the matrix in CSR format.
3453 
3454    Collective
3455 
3456    Input Parameters:
3457 +  comm - MPI communicator
3458 .  A - "diagonal" portion of matrix
3459 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3460 -  garray - global index of B columns
3461 
3462    Output Parameter:
3463 .   mat - the matrix, with input A as its local diagonal matrix
3464    Level: advanced
3465 
3466    Notes:
3467    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3468 
3469    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3470 
3471 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3472 @*/
3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3474 {
3475   Mat_MPIAIJ        *maij;
3476   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3477   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3478   const PetscScalar *oa;
3479   Mat                Bnew;
3480   PetscInt           m, n, N;
3481   MatType            mpi_mat_type;
3482 
3483   PetscFunctionBegin;
3484   PetscCall(MatCreate(comm, mat));
3485   PetscCall(MatGetSize(A, &m, &n));
3486   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3487   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3488   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3489   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3490 
3491   /* Get global columns of mat */
3492   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3493 
3494   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3495   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3496   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3497   PetscCall(MatSetType(*mat, mpi_mat_type));
3498 
3499   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3500   maij = (Mat_MPIAIJ *)(*mat)->data;
3501 
3502   (*mat)->preallocated = PETSC_TRUE;
3503 
3504   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3505   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3506 
3507   /* Set A as diagonal portion of *mat */
3508   maij->A = A;
3509 
3510   nz = oi[m];
3511   for (i = 0; i < nz; i++) {
3512     col   = oj[i];
3513     oj[i] = garray[col];
3514   }
3515 
3516   /* Set Bnew as off-diagonal portion of *mat */
3517   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3518   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3519   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3520   bnew        = (Mat_SeqAIJ *)Bnew->data;
3521   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3522   maij->B     = Bnew;
3523 
3524   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3525 
3526   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3527   b->free_a       = PETSC_FALSE;
3528   b->free_ij      = PETSC_FALSE;
3529   PetscCall(MatDestroy(&B));
3530 
3531   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3532   bnew->free_a       = PETSC_TRUE;
3533   bnew->free_ij      = PETSC_TRUE;
3534 
3535   /* condense columns of maij->B */
3536   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3537   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3538   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3539   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3540   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3541   PetscFunctionReturn(0);
3542 }
3543 
3544 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3545 
3546 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3547 {
3548   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3549   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3550   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3551   Mat             M, Msub, B = a->B;
3552   MatScalar      *aa;
3553   Mat_SeqAIJ     *aij;
3554   PetscInt       *garray = a->garray, *colsub, Ncols;
3555   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3556   IS              iscol_sub, iscmap;
3557   const PetscInt *is_idx, *cmap;
3558   PetscBool       allcolumns = PETSC_FALSE;
3559   MPI_Comm        comm;
3560 
3561   PetscFunctionBegin;
3562   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3563   if (call == MAT_REUSE_MATRIX) {
3564     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3565     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3566     PetscCall(ISGetLocalSize(iscol_sub, &count));
3567 
3568     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3569     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3570 
3571     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3572     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3573 
3574     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3575 
3576   } else { /* call == MAT_INITIAL_MATRIX) */
3577     PetscBool flg;
3578 
3579     PetscCall(ISGetLocalSize(iscol, &n));
3580     PetscCall(ISGetSize(iscol, &Ncols));
3581 
3582     /* (1) iscol -> nonscalable iscol_local */
3583     /* Check for special case: each processor gets entire matrix columns */
3584     PetscCall(ISIdentity(iscol_local, &flg));
3585     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3586     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3587     if (allcolumns) {
3588       iscol_sub = iscol_local;
3589       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3590       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3591 
3592     } else {
3593       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3594       PetscInt *idx, *cmap1, k;
3595       PetscCall(PetscMalloc1(Ncols, &idx));
3596       PetscCall(PetscMalloc1(Ncols, &cmap1));
3597       PetscCall(ISGetIndices(iscol_local, &is_idx));
3598       count = 0;
3599       k     = 0;
3600       for (i = 0; i < Ncols; i++) {
3601         j = is_idx[i];
3602         if (j >= cstart && j < cend) {
3603           /* diagonal part of mat */
3604           idx[count]     = j;
3605           cmap1[count++] = i; /* column index in submat */
3606         } else if (Bn) {
3607           /* off-diagonal part of mat */
3608           if (j == garray[k]) {
3609             idx[count]     = j;
3610             cmap1[count++] = i; /* column index in submat */
3611           } else if (j > garray[k]) {
3612             while (j > garray[k] && k < Bn - 1) k++;
3613             if (j == garray[k]) {
3614               idx[count]     = j;
3615               cmap1[count++] = i; /* column index in submat */
3616             }
3617           }
3618         }
3619       }
3620       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3621 
3622       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3623       PetscCall(ISGetBlockSize(iscol, &cbs));
3624       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3625 
3626       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3627     }
3628 
3629     /* (3) Create sequential Msub */
3630     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3631   }
3632 
3633   PetscCall(ISGetLocalSize(iscol_sub, &count));
3634   aij = (Mat_SeqAIJ *)(Msub)->data;
3635   ii  = aij->i;
3636   PetscCall(ISGetIndices(iscmap, &cmap));
3637 
3638   /*
3639       m - number of local rows
3640       Ncols - number of columns (same on all processors)
3641       rstart - first row in new global matrix generated
3642   */
3643   PetscCall(MatGetSize(Msub, &m, NULL));
3644 
3645   if (call == MAT_INITIAL_MATRIX) {
3646     /* (4) Create parallel newmat */
3647     PetscMPIInt rank, size;
3648     PetscInt    csize;
3649 
3650     PetscCallMPI(MPI_Comm_size(comm, &size));
3651     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3652 
3653     /*
3654         Determine the number of non-zeros in the diagonal and off-diagonal
3655         portions of the matrix in order to do correct preallocation
3656     */
3657 
3658     /* first get start and end of "diagonal" columns */
3659     PetscCall(ISGetLocalSize(iscol, &csize));
3660     if (csize == PETSC_DECIDE) {
3661       PetscCall(ISGetSize(isrow, &mglobal));
3662       if (mglobal == Ncols) { /* square matrix */
3663         nlocal = m;
3664       } else {
3665         nlocal = Ncols / size + ((Ncols % size) > rank);
3666       }
3667     } else {
3668       nlocal = csize;
3669     }
3670     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3671     rstart = rend - nlocal;
3672     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3673 
3674     /* next, compute all the lengths */
3675     jj = aij->j;
3676     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3677     olens = dlens + m;
3678     for (i = 0; i < m; i++) {
3679       jend = ii[i + 1] - ii[i];
3680       olen = 0;
3681       dlen = 0;
3682       for (j = 0; j < jend; j++) {
3683         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3684         else dlen++;
3685         jj++;
3686       }
3687       olens[i] = olen;
3688       dlens[i] = dlen;
3689     }
3690 
3691     PetscCall(ISGetBlockSize(isrow, &bs));
3692     PetscCall(ISGetBlockSize(iscol, &cbs));
3693 
3694     PetscCall(MatCreate(comm, &M));
3695     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3696     PetscCall(MatSetBlockSizes(M, bs, cbs));
3697     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3698     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3699     PetscCall(PetscFree(dlens));
3700 
3701   } else { /* call == MAT_REUSE_MATRIX */
3702     M = *newmat;
3703     PetscCall(MatGetLocalSize(M, &i, NULL));
3704     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3705     PetscCall(MatZeroEntries(M));
3706     /*
3707          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3708        rather than the slower MatSetValues().
3709     */
3710     M->was_assembled = PETSC_TRUE;
3711     M->assembled     = PETSC_FALSE;
3712   }
3713 
3714   /* (5) Set values of Msub to *newmat */
3715   PetscCall(PetscMalloc1(count, &colsub));
3716   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3717 
3718   jj = aij->j;
3719   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3720   for (i = 0; i < m; i++) {
3721     row = rstart + i;
3722     nz  = ii[i + 1] - ii[i];
3723     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3724     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3725     jj += nz;
3726     aa += nz;
3727   }
3728   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3729   PetscCall(ISRestoreIndices(iscmap, &cmap));
3730 
3731   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3732   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3733 
3734   PetscCall(PetscFree(colsub));
3735 
3736   /* save Msub, iscol_sub and iscmap used in processor for next request */
3737   if (call == MAT_INITIAL_MATRIX) {
3738     *newmat = M;
3739     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3740     PetscCall(MatDestroy(&Msub));
3741 
3742     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3743     PetscCall(ISDestroy(&iscol_sub));
3744 
3745     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3746     PetscCall(ISDestroy(&iscmap));
3747 
3748     if (iscol_local) {
3749       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3750       PetscCall(ISDestroy(&iscol_local));
3751     }
3752   }
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 /*
3757     Not great since it makes two copies of the submatrix, first an SeqAIJ
3758   in local and then by concatenating the local matrices the end result.
3759   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3760 
3761   This requires a sequential iscol with all indices.
3762 */
3763 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3764 {
3765   PetscMPIInt rank, size;
3766   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3767   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3768   Mat         M, Mreuse;
3769   MatScalar  *aa, *vwork;
3770   MPI_Comm    comm;
3771   Mat_SeqAIJ *aij;
3772   PetscBool   colflag, allcolumns = PETSC_FALSE;
3773 
3774   PetscFunctionBegin;
3775   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3776   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3777   PetscCallMPI(MPI_Comm_size(comm, &size));
3778 
3779   /* Check for special case: each processor gets entire matrix columns */
3780   PetscCall(ISIdentity(iscol, &colflag));
3781   PetscCall(ISGetLocalSize(iscol, &n));
3782   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3783   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3784 
3785   if (call == MAT_REUSE_MATRIX) {
3786     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3787     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3788     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3789   } else {
3790     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3791   }
3792 
3793   /*
3794       m - number of local rows
3795       n - number of columns (same on all processors)
3796       rstart - first row in new global matrix generated
3797   */
3798   PetscCall(MatGetSize(Mreuse, &m, &n));
3799   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3800   if (call == MAT_INITIAL_MATRIX) {
3801     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3802     ii  = aij->i;
3803     jj  = aij->j;
3804 
3805     /*
3806         Determine the number of non-zeros in the diagonal and off-diagonal
3807         portions of the matrix in order to do correct preallocation
3808     */
3809 
3810     /* first get start and end of "diagonal" columns */
3811     if (csize == PETSC_DECIDE) {
3812       PetscCall(ISGetSize(isrow, &mglobal));
3813       if (mglobal == n) { /* square matrix */
3814         nlocal = m;
3815       } else {
3816         nlocal = n / size + ((n % size) > rank);
3817       }
3818     } else {
3819       nlocal = csize;
3820     }
3821     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3822     rstart = rend - nlocal;
3823     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3824 
3825     /* next, compute all the lengths */
3826     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3827     olens = dlens + m;
3828     for (i = 0; i < m; i++) {
3829       jend = ii[i + 1] - ii[i];
3830       olen = 0;
3831       dlen = 0;
3832       for (j = 0; j < jend; j++) {
3833         if (*jj < rstart || *jj >= rend) olen++;
3834         else dlen++;
3835         jj++;
3836       }
3837       olens[i] = olen;
3838       dlens[i] = dlen;
3839     }
3840     PetscCall(MatCreate(comm, &M));
3841     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3842     PetscCall(MatSetBlockSizes(M, bs, cbs));
3843     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3844     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3845     PetscCall(PetscFree(dlens));
3846   } else {
3847     PetscInt ml, nl;
3848 
3849     M = *newmat;
3850     PetscCall(MatGetLocalSize(M, &ml, &nl));
3851     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3852     PetscCall(MatZeroEntries(M));
3853     /*
3854          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3855        rather than the slower MatSetValues().
3856     */
3857     M->was_assembled = PETSC_TRUE;
3858     M->assembled     = PETSC_FALSE;
3859   }
3860   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3861   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3862   ii  = aij->i;
3863   jj  = aij->j;
3864 
3865   /* trigger copy to CPU if needed */
3866   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3867   for (i = 0; i < m; i++) {
3868     row   = rstart + i;
3869     nz    = ii[i + 1] - ii[i];
3870     cwork = jj;
3871     jj += nz;
3872     vwork = aa;
3873     aa += nz;
3874     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3875   }
3876   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3877 
3878   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3879   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3880   *newmat = M;
3881 
3882   /* save submatrix used in processor for next request */
3883   if (call == MAT_INITIAL_MATRIX) {
3884     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3885     PetscCall(MatDestroy(&Mreuse));
3886   }
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3891 {
3892   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3893   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3894   const PetscInt *JJ;
3895   PetscBool       nooffprocentries;
3896   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3897 
3898   PetscFunctionBegin;
3899   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3900 
3901   PetscCall(PetscLayoutSetUp(B->rmap));
3902   PetscCall(PetscLayoutSetUp(B->cmap));
3903   m      = B->rmap->n;
3904   cstart = B->cmap->rstart;
3905   cend   = B->cmap->rend;
3906   rstart = B->rmap->rstart;
3907 
3908   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3909 
3910   if (PetscDefined(USE_DEBUG)) {
3911     for (i = 0; i < m; i++) {
3912       nnz = Ii[i + 1] - Ii[i];
3913       JJ  = J + Ii[i];
3914       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3915       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3916       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3917     }
3918   }
3919 
3920   for (i = 0; i < m; i++) {
3921     nnz     = Ii[i + 1] - Ii[i];
3922     JJ      = J + Ii[i];
3923     nnz_max = PetscMax(nnz_max, nnz);
3924     d       = 0;
3925     for (j = 0; j < nnz; j++) {
3926       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3927     }
3928     d_nnz[i] = d;
3929     o_nnz[i] = nnz - d;
3930   }
3931   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3932   PetscCall(PetscFree2(d_nnz, o_nnz));
3933 
3934   for (i = 0; i < m; i++) {
3935     ii = i + rstart;
3936     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3937   }
3938   nooffprocentries    = B->nooffprocentries;
3939   B->nooffprocentries = PETSC_TRUE;
3940   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3941   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3942   B->nooffprocentries = nooffprocentries;
3943 
3944   /* count number of entries below block diagonal */
3945   PetscCall(PetscFree(Aij->ld));
3946   PetscCall(PetscCalloc1(m, &ld));
3947   Aij->ld = ld;
3948   for (i = 0; i < m; i++) {
3949     nnz = Ii[i + 1] - Ii[i];
3950     j   = 0;
3951     while (j < nnz && J[j] < cstart) j++;
3952     ld[i] = j;
3953     J += nnz;
3954   }
3955 
3956   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3957   PetscFunctionReturn(0);
3958 }
3959 
3960 /*@
3961    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3962    (the default parallel PETSc format).
3963 
3964    Collective
3965 
3966    Input Parameters:
3967 +  B - the matrix
3968 .  i - the indices into j for the start of each local row (starts with zero)
3969 .  j - the column indices for each local row (starts with zero)
3970 -  v - optional values in the matrix
3971 
3972    Level: developer
3973 
3974    Notes:
3975        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3976      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3977      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3978 
3979        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3980 
3981        The format which is used for the sparse matrix input, is equivalent to a
3982     row-major ordering.. i.e for the following matrix, the input data expected is
3983     as shown
3984 
3985 $        1 0 0
3986 $        2 0 3     P0
3987 $       -------
3988 $        4 5 6     P1
3989 $
3990 $     Process0 [P0]: rows_owned=[0,1]
3991 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3992 $        j =  {0,0,2}  [size = 3]
3993 $        v =  {1,2,3}  [size = 3]
3994 $
3995 $     Process1 [P1]: rows_owned=[2]
3996 $        i =  {0,3}    [size = nrow+1  = 1+1]
3997 $        j =  {0,1,2}  [size = 3]
3998 $        v =  {4,5,6}  [size = 3]
3999 
4000 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
4001           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
4002 @*/
4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4004 {
4005   PetscFunctionBegin;
4006   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 /*@C
4011    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4012    (the default parallel PETSc format).  For good matrix assembly performance
4013    the user should preallocate the matrix storage by setting the parameters
4014    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4015    performance can be increased by more than a factor of 50.
4016 
4017    Collective
4018 
4019    Input Parameters:
4020 +  B - the matrix
4021 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4022            (same value is used for all local rows)
4023 .  d_nnz - array containing the number of nonzeros in the various rows of the
4024            DIAGONAL portion of the local submatrix (possibly different for each row)
4025            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
4026            The size of this array is equal to the number of local rows, i.e 'm'.
4027            For matrices that will be factored, you must leave room for (and set)
4028            the diagonal entry even if it is zero.
4029 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4030            submatrix (same value is used for all local rows).
4031 -  o_nnz - array containing the number of nonzeros in the various rows of the
4032            OFF-DIAGONAL portion of the local submatrix (possibly different for
4033            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
4034            structure. The size of this array is equal to the number
4035            of local rows, i.e 'm'.
4036 
4037    If the *_nnz parameter is given then the *_nz parameter is ignored
4038 
4039    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
4040    storage.  The stored row and column indices begin with zero.
4041    See [Sparse Matrices](sec_matsparse) for details.
4042 
4043    The parallel matrix is partitioned such that the first m0 rows belong to
4044    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4045    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4046 
4047    The DIAGONAL portion of the local submatrix of a processor can be defined
4048    as the submatrix which is obtained by extraction the part corresponding to
4049    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4050    first row that belongs to the processor, r2 is the last row belonging to
4051    the this processor, and c1-c2 is range of indices of the local part of a
4052    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4053    common case of a square matrix, the row and column ranges are the same and
4054    the DIAGONAL part is also square. The remaining portion of the local
4055    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4056 
4057    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4058 
4059    You can call MatGetInfo() to get information on how effective the preallocation was;
4060    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4061    You can also run with the option -info and look for messages with the string
4062    malloc in them to see if additional memory allocation was needed.
4063 
4064    Example usage:
4065 
4066    Consider the following 8x8 matrix with 34 non-zero values, that is
4067    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4068    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4069    as follows:
4070 
4071 .vb
4072             1  2  0  |  0  3  0  |  0  4
4073     Proc0   0  5  6  |  7  0  0  |  8  0
4074             9  0 10  | 11  0  0  | 12  0
4075     -------------------------------------
4076            13  0 14  | 15 16 17  |  0  0
4077     Proc1   0 18  0  | 19 20 21  |  0  0
4078             0  0  0  | 22 23  0  | 24  0
4079     -------------------------------------
4080     Proc2  25 26 27  |  0  0 28  | 29  0
4081            30  0  0  | 31 32 33  |  0 34
4082 .ve
4083 
4084    This can be represented as a collection of submatrices as:
4085 
4086 .vb
4087       A B C
4088       D E F
4089       G H I
4090 .ve
4091 
4092    Where the submatrices A,B,C are owned by proc0, D,E,F are
4093    owned by proc1, G,H,I are owned by proc2.
4094 
4095    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'M','N' parameters are 8,8, and have the same values on all procs.
4098 
4099    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4100    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4101    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4102    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4103    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4104    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4105 
4106    When d_nz, o_nz parameters are specified, d_nz storage elements are
4107    allocated for every row of the local diagonal submatrix, and o_nz
4108    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4109    One way to choose d_nz and o_nz is to use the max nonzerors per local
4110    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4111    In this case, the values of d_nz,o_nz are:
4112 .vb
4113      proc0 : dnz = 2, o_nz = 2
4114      proc1 : dnz = 3, o_nz = 2
4115      proc2 : dnz = 1, o_nz = 4
4116 .ve
4117    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4118    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4119    for proc3. i.e we are using 12+15+10=37 storage locations to store
4120    34 values.
4121 
4122    When d_nnz, o_nnz parameters are specified, the storage is specified
4123    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4124    In the above case the values for d_nnz,o_nnz are:
4125 .vb
4126      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4127      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4128      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4129 .ve
4130    Here the space allocated is sum of all the above values i.e 34, and
4131    hence pre-allocation is perfect.
4132 
4133    Level: intermediate
4134 
4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4136           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4137 @*/
4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4139 {
4140   PetscFunctionBegin;
4141   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4142   PetscValidType(B, 1);
4143   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4144   PetscFunctionReturn(0);
4145 }
4146 
4147 /*@
4148      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4149          CSR format for the local rows.
4150 
4151    Collective
4152 
4153    Input Parameters:
4154 +  comm - MPI communicator
4155 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4156 .  n - This value should be the same as the local size used in creating the
4157        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4158        calculated if N is given) For square matrices n is almost always m.
4159 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4160 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4161 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4162 .   j - column indices
4163 -   a - optional matrix values
4164 
4165    Output Parameter:
4166 .   mat - the matrix
4167 
4168    Level: intermediate
4169 
4170    Notes:
4171        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4172      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4173      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4174 
4175        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4176 
4177        The format which is used for the sparse matrix input, is equivalent to a
4178     row-major ordering.. i.e for the following matrix, the input data expected is
4179     as shown
4180 
4181        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4182 
4183 $        1 0 0
4184 $        2 0 3     P0
4185 $       -------
4186 $        4 5 6     P1
4187 $
4188 $     Process0 [P0]: rows_owned=[0,1]
4189 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4190 $        j =  {0,0,2}  [size = 3]
4191 $        v =  {1,2,3}  [size = 3]
4192 $
4193 $     Process1 [P1]: rows_owned=[2]
4194 $        i =  {0,3}    [size = nrow+1  = 1+1]
4195 $        j =  {0,1,2}  [size = 3]
4196 $        v =  {4,5,6}  [size = 3]
4197 
4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4199           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4200 @*/
4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4202 {
4203   PetscFunctionBegin;
4204   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4205   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4206   PetscCall(MatCreate(comm, mat));
4207   PetscCall(MatSetSizes(*mat, m, n, M, N));
4208   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4209   PetscCall(MatSetType(*mat, MATMPIAIJ));
4210   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 /*@
4215      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4216          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4217 
4218      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4219 
4220    Collective
4221 
4222    Input Parameters:
4223 +  mat - the matrix
4224 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4225 .  n - This value should be the same as the local size used in creating the
4226        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4227        calculated if N is given) For square matrices n is almost always m.
4228 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4229 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4230 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4231 .  J - column indices
4232 -  v - matrix values
4233 
4234    Level: intermediate
4235 
4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4237           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4238 @*/
4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4240 {
4241   PetscInt        nnz, i;
4242   PetscBool       nooffprocentries;
4243   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4244   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4245   PetscScalar    *ad, *ao;
4246   PetscInt        ldi, Iii, md;
4247   const PetscInt *Adi = Ad->i;
4248   PetscInt       *ld  = Aij->ld;
4249 
4250   PetscFunctionBegin;
4251   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4252   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4253   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4254   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4255 
4256   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4257   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4258 
4259   for (i = 0; i < m; i++) {
4260     nnz = Ii[i + 1] - Ii[i];
4261     Iii = Ii[i];
4262     ldi = ld[i];
4263     md  = Adi[i + 1] - Adi[i];
4264     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4265     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4266     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4267     ad += md;
4268     ao += nnz - md;
4269   }
4270   nooffprocentries      = mat->nooffprocentries;
4271   mat->nooffprocentries = PETSC_TRUE;
4272   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4273   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4274   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4275   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4276   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4277   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4278   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4279   mat->nooffprocentries = nooffprocentries;
4280   PetscFunctionReturn(0);
4281 }
4282 
4283 /*@
4284      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4285 
4286    Collective
4287 
4288    Input Parameters:
4289 +  mat - the matrix
4290 -  v - matrix values, stored by row
4291 
4292    Level: intermediate
4293 
4294    Note:
4295    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4296 
4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4298           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4299 @*/
4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4301 {
4302   PetscInt        nnz, i, m;
4303   PetscBool       nooffprocentries;
4304   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4305   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4306   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4307   PetscScalar    *ad, *ao;
4308   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4309   PetscInt        ldi, Iii, md;
4310   PetscInt       *ld = Aij->ld;
4311 
4312   PetscFunctionBegin;
4313   m = mat->rmap->n;
4314 
4315   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4316   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4317   Iii = 0;
4318   for (i = 0; i < m; i++) {
4319     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4320     ldi = ld[i];
4321     md  = Adi[i + 1] - Adi[i];
4322     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4323     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4324     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4325     ad += md;
4326     ao += nnz - md;
4327     Iii += nnz;
4328   }
4329   nooffprocentries      = mat->nooffprocentries;
4330   mat->nooffprocentries = PETSC_TRUE;
4331   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4332   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4333   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4334   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4335   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4336   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4337   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4338   mat->nooffprocentries = nooffprocentries;
4339   PetscFunctionReturn(0);
4340 }
4341 
4342 /*@C
4343    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4344    (the default parallel PETSc format).  For good matrix assembly performance
4345    the user should preallocate the matrix storage by setting the parameters
4346    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4347    performance can be increased by more than a factor of 50.
4348 
4349    Collective
4350 
4351    Input Parameters:
4352 +  comm - MPI communicator
4353 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4354            This value should be the same as the local size used in creating the
4355            y vector for the matrix-vector product y = Ax.
4356 .  n - This value should be the same as the local size used in creating the
4357        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4358        calculated if N is given) For square matrices n is almost always m.
4359 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4360 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4361 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4362            (same value is used for all local rows)
4363 .  d_nnz - array containing the number of nonzeros in the various rows of the
4364            DIAGONAL portion of the local submatrix (possibly different for each row)
4365            or NULL, if d_nz is used to specify the nonzero structure.
4366            The size of this array is equal to the number of local rows, i.e 'm'.
4367 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4368            submatrix (same value is used for all local rows).
4369 -  o_nnz - array containing the number of nonzeros in the various rows of the
4370            OFF-DIAGONAL portion of the local submatrix (possibly different for
4371            each row) or NULL, if o_nz is used to specify the nonzero
4372            structure. The size of this array is equal to the number
4373            of local rows, i.e 'm'.
4374 
4375    Output Parameter:
4376 .  A - the matrix
4377 
4378    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4379    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4380    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4381 
4382    Notes:
4383    If the *_nnz parameter is given then the *_nz parameter is ignored
4384 
4385    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4386    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4387    storage requirements for this matrix.
4388 
4389    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4390    processor than it must be used on all processors that share the object for
4391    that argument.
4392 
4393    The user MUST specify either the local or global matrix dimensions
4394    (possibly both).
4395 
4396    The parallel matrix is partitioned across processors such that the
4397    first m0 rows belong to process 0, the next m1 rows belong to
4398    process 1, the next m2 rows belong to process 2 etc.. where
4399    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4400    values corresponding to [m x N] submatrix.
4401 
4402    The columns are logically partitioned with the n0 columns belonging
4403    to 0th partition, the next n1 columns belonging to the next
4404    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4405 
4406    The DIAGONAL portion of the local submatrix on any given processor
4407    is the submatrix corresponding to the rows and columns m,n
4408    corresponding to the given processor. i.e diagonal matrix on
4409    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4410    etc. The remaining portion of the local submatrix [m x (N-n)]
4411    constitute the OFF-DIAGONAL portion. The example below better
4412    illustrates this concept.
4413 
4414    For a square global matrix we define each processor's diagonal portion
4415    to be its local rows and the corresponding columns (a square submatrix);
4416    each processor's off-diagonal portion encompasses the remainder of the
4417    local matrix (a rectangular submatrix).
4418 
4419    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4420 
4421    When calling this routine with a single process communicator, a matrix of
4422    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4423    type of communicator, use the construction mechanism
4424 .vb
4425      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4426 .ve
4427 
4428 $     MatCreate(...,&A);
4429 $     MatSetType(A,MATMPIAIJ);
4430 $     MatSetSizes(A, m,n,M,N);
4431 $     MatMPIAIJSetPreallocation(A,...);
4432 
4433    By default, this format uses inodes (identical nodes) when possible.
4434    We search for consecutive rows with the same nonzero structure, thereby
4435    reusing matrix information to achieve increased efficiency.
4436 
4437    Options Database Keys:
4438 +  -mat_no_inode  - Do not use inodes
4439 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4440 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4441         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4442         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4443 
4444    Example usage:
4445 
4446    Consider the following 8x8 matrix with 34 non-zero values, that is
4447    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4448    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4449    as follows
4450 
4451 .vb
4452             1  2  0  |  0  3  0  |  0  4
4453     Proc0   0  5  6  |  7  0  0  |  8  0
4454             9  0 10  | 11  0  0  | 12  0
4455     -------------------------------------
4456            13  0 14  | 15 16 17  |  0  0
4457     Proc1   0 18  0  | 19 20 21  |  0  0
4458             0  0  0  | 22 23  0  | 24  0
4459     -------------------------------------
4460     Proc2  25 26 27  |  0  0 28  | 29  0
4461            30  0  0  | 31 32 33  |  0 34
4462 .ve
4463 
4464    This can be represented as a collection of submatrices as
4465 
4466 .vb
4467       A B C
4468       D E F
4469       G H I
4470 .ve
4471 
4472    Where the submatrices A,B,C are owned by proc0, D,E,F are
4473    owned by proc1, G,H,I are owned by proc2.
4474 
4475    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4476    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4477    The 'M','N' parameters are 8,8, and have the same values on all procs.
4478 
4479    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4480    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4481    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4482    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4483    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4484    matrix, ans [DF] as another SeqAIJ matrix.
4485 
4486    When d_nz, o_nz parameters are specified, d_nz storage elements are
4487    allocated for every row of the local diagonal submatrix, and o_nz
4488    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4489    One way to choose d_nz and o_nz is to use the max nonzerors per local
4490    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4491    In this case, the values of d_nz,o_nz are
4492 .vb
4493      proc0 : dnz = 2, o_nz = 2
4494      proc1 : dnz = 3, o_nz = 2
4495      proc2 : dnz = 1, o_nz = 4
4496 .ve
4497    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4498    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4499    for proc3. i.e we are using 12+15+10=37 storage locations to store
4500    34 values.
4501 
4502    When d_nnz, o_nnz parameters are specified, the storage is specified
4503    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4504    In the above case the values for d_nnz,o_nnz are
4505 .vb
4506      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4507      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4508      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4509 .ve
4510    Here the space allocated is sum of all the above values i.e 34, and
4511    hence pre-allocation is perfect.
4512 
4513    Level: intermediate
4514 
4515 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4516           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4517 @*/
4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4519 {
4520   PetscMPIInt size;
4521 
4522   PetscFunctionBegin;
4523   PetscCall(MatCreate(comm, A));
4524   PetscCall(MatSetSizes(*A, m, n, M, N));
4525   PetscCallMPI(MPI_Comm_size(comm, &size));
4526   if (size > 1) {
4527     PetscCall(MatSetType(*A, MATMPIAIJ));
4528     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4529   } else {
4530     PetscCall(MatSetType(*A, MATSEQAIJ));
4531     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4532   }
4533   PetscFunctionReturn(0);
4534 }
4535 
4536 /*MC
4537     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4538 
4539     Synopsis:
4540     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4541 
4542     Not Collective
4543 
4544     Input Parameter:
4545 .   A - the `MATMPIAIJ` matrix
4546 
4547     Output Parameters:
4548 +   Ad - the diagonal portion of the matrix
4549 .   Ao - the off diagonal portion of the matrix
4550 .   colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4551 -   ierr - error code
4552 
4553      Level: advanced
4554 
4555     Note:
4556     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4557 
4558 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4559 M*/
4560 
4561 /*MC
4562     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4563 
4564     Synopsis:
4565     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4566 
4567     Not Collective
4568 
4569     Input Parameters:
4570 +   A - the `MATMPIAIJ` matrix
4571 .   Ad - the diagonal portion of the matrix
4572 .   Ao - the off diagonal portion of the matrix
4573 .   colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4574 -   ierr - error code
4575 
4576      Level: advanced
4577 
4578 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4579 M*/
4580 
4581 /*@C
4582   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4583 
4584   Not collective
4585 
4586   Input Parameter:
4587 . A - The `MATMPIAIJ` matrix
4588 
4589   Output Parameters:
4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4592 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4593 
4594   Note:
4595   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4596   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4597   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4598   local column numbers to global column numbers in the original matrix.
4599 
4600   Level: intermediate
4601 
4602 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4603 @*/
4604 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4605 {
4606   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4607   PetscBool   flg;
4608 
4609   PetscFunctionBegin;
4610   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4611   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4612   if (Ad) *Ad = a->A;
4613   if (Ao) *Ao = a->B;
4614   if (colmap) *colmap = a->garray;
4615   PetscFunctionReturn(0);
4616 }
4617 
4618 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4619 {
4620   PetscInt     m, N, i, rstart, nnz, Ii;
4621   PetscInt    *indx;
4622   PetscScalar *values;
4623   MatType      rootType;
4624 
4625   PetscFunctionBegin;
4626   PetscCall(MatGetSize(inmat, &m, &N));
4627   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4628     PetscInt *dnz, *onz, sum, bs, cbs;
4629 
4630     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4631     /* Check sum(n) = N */
4632     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4633     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4634 
4635     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4636     rstart -= m;
4637 
4638     MatPreallocateBegin(comm, m, n, dnz, onz);
4639     for (i = 0; i < m; i++) {
4640       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4641       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4642       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4643     }
4644 
4645     PetscCall(MatCreate(comm, outmat));
4646     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4647     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4648     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4649     PetscCall(MatGetRootType_Private(inmat, &rootType));
4650     PetscCall(MatSetType(*outmat, rootType));
4651     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4652     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4653     MatPreallocateEnd(dnz, onz);
4654     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4655   }
4656 
4657   /* numeric phase */
4658   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4659   for (i = 0; i < m; i++) {
4660     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4661     Ii = i + rstart;
4662     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4663     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4664   }
4665   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4666   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4667   PetscFunctionReturn(0);
4668 }
4669 
4670 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4671 {
4672   PetscMPIInt        rank;
4673   PetscInt           m, N, i, rstart, nnz;
4674   size_t             len;
4675   const PetscInt    *indx;
4676   PetscViewer        out;
4677   char              *name;
4678   Mat                B;
4679   const PetscScalar *values;
4680 
4681   PetscFunctionBegin;
4682   PetscCall(MatGetLocalSize(A, &m, NULL));
4683   PetscCall(MatGetSize(A, NULL, &N));
4684   /* Should this be the type of the diagonal block of A? */
4685   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4686   PetscCall(MatSetSizes(B, m, N, m, N));
4687   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4688   PetscCall(MatSetType(B, MATSEQAIJ));
4689   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4690   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4691   for (i = 0; i < m; i++) {
4692     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4693     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4694     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4695   }
4696   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4697   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4698 
4699   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4700   PetscCall(PetscStrlen(outfile, &len));
4701   PetscCall(PetscMalloc1(len + 6, &name));
4702   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4703   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4704   PetscCall(PetscFree(name));
4705   PetscCall(MatView(B, out));
4706   PetscCall(PetscViewerDestroy(&out));
4707   PetscCall(MatDestroy(&B));
4708   PetscFunctionReturn(0);
4709 }
4710 
4711 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4712 {
4713   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4714 
4715   PetscFunctionBegin;
4716   if (!merge) PetscFunctionReturn(0);
4717   PetscCall(PetscFree(merge->id_r));
4718   PetscCall(PetscFree(merge->len_s));
4719   PetscCall(PetscFree(merge->len_r));
4720   PetscCall(PetscFree(merge->bi));
4721   PetscCall(PetscFree(merge->bj));
4722   PetscCall(PetscFree(merge->buf_ri[0]));
4723   PetscCall(PetscFree(merge->buf_ri));
4724   PetscCall(PetscFree(merge->buf_rj[0]));
4725   PetscCall(PetscFree(merge->buf_rj));
4726   PetscCall(PetscFree(merge->coi));
4727   PetscCall(PetscFree(merge->coj));
4728   PetscCall(PetscFree(merge->owners_co));
4729   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4730   PetscCall(PetscFree(merge));
4731   PetscFunctionReturn(0);
4732 }
4733 
4734 #include <../src/mat/utils/freespace.h>
4735 #include <petscbt.h>
4736 
4737 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4738 {
4739   MPI_Comm             comm;
4740   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4741   PetscMPIInt          size, rank, taga, *len_s;
4742   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4743   PetscInt             proc, m;
4744   PetscInt           **buf_ri, **buf_rj;
4745   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4746   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4747   MPI_Request         *s_waits, *r_waits;
4748   MPI_Status          *status;
4749   const MatScalar     *aa, *a_a;
4750   MatScalar          **abuf_r, *ba_i;
4751   Mat_Merge_SeqsToMPI *merge;
4752   PetscContainer       container;
4753 
4754   PetscFunctionBegin;
4755   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4756   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4757 
4758   PetscCallMPI(MPI_Comm_size(comm, &size));
4759   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4760 
4761   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4762   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4763   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4764   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4765   aa = a_a;
4766 
4767   bi     = merge->bi;
4768   bj     = merge->bj;
4769   buf_ri = merge->buf_ri;
4770   buf_rj = merge->buf_rj;
4771 
4772   PetscCall(PetscMalloc1(size, &status));
4773   owners = merge->rowmap->range;
4774   len_s  = merge->len_s;
4775 
4776   /* send and recv matrix values */
4777   /*-----------------------------*/
4778   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4779   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4780 
4781   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4782   for (proc = 0, k = 0; proc < size; proc++) {
4783     if (!len_s[proc]) continue;
4784     i = owners[proc];
4785     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4786     k++;
4787   }
4788 
4789   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4790   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4791   PetscCall(PetscFree(status));
4792 
4793   PetscCall(PetscFree(s_waits));
4794   PetscCall(PetscFree(r_waits));
4795 
4796   /* insert mat values of mpimat */
4797   /*----------------------------*/
4798   PetscCall(PetscMalloc1(N, &ba_i));
4799   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4800 
4801   for (k = 0; k < merge->nrecv; k++) {
4802     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4803     nrows       = *(buf_ri_k[k]);
4804     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4805     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4806   }
4807 
4808   /* set values of ba */
4809   m = merge->rowmap->n;
4810   for (i = 0; i < m; i++) {
4811     arow = owners[rank] + i;
4812     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4813     bnzi = bi[i + 1] - bi[i];
4814     PetscCall(PetscArrayzero(ba_i, bnzi));
4815 
4816     /* add local non-zero vals of this proc's seqmat into ba */
4817     anzi   = ai[arow + 1] - ai[arow];
4818     aj     = a->j + ai[arow];
4819     aa     = a_a + ai[arow];
4820     nextaj = 0;
4821     for (j = 0; nextaj < anzi; j++) {
4822       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4823         ba_i[j] += aa[nextaj++];
4824       }
4825     }
4826 
4827     /* add received vals into ba */
4828     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4829       /* i-th row */
4830       if (i == *nextrow[k]) {
4831         anzi   = *(nextai[k] + 1) - *nextai[k];
4832         aj     = buf_rj[k] + *(nextai[k]);
4833         aa     = abuf_r[k] + *(nextai[k]);
4834         nextaj = 0;
4835         for (j = 0; nextaj < anzi; j++) {
4836           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4837             ba_i[j] += aa[nextaj++];
4838           }
4839         }
4840         nextrow[k]++;
4841         nextai[k]++;
4842       }
4843     }
4844     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4845   }
4846   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4847   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4848   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4849 
4850   PetscCall(PetscFree(abuf_r[0]));
4851   PetscCall(PetscFree(abuf_r));
4852   PetscCall(PetscFree(ba_i));
4853   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4854   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4855   PetscFunctionReturn(0);
4856 }
4857 
4858 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4859 {
4860   Mat                  B_mpi;
4861   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4862   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4863   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4864   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4865   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4866   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4867   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4868   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4869   MPI_Status          *status;
4870   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4871   PetscBT              lnkbt;
4872   Mat_Merge_SeqsToMPI *merge;
4873   PetscContainer       container;
4874 
4875   PetscFunctionBegin;
4876   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4877 
4878   /* make sure it is a PETSc comm */
4879   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4880   PetscCallMPI(MPI_Comm_size(comm, &size));
4881   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4882 
4883   PetscCall(PetscNew(&merge));
4884   PetscCall(PetscMalloc1(size, &status));
4885 
4886   /* determine row ownership */
4887   /*---------------------------------------------------------*/
4888   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4889   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4890   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4891   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4892   PetscCall(PetscLayoutSetUp(merge->rowmap));
4893   PetscCall(PetscMalloc1(size, &len_si));
4894   PetscCall(PetscMalloc1(size, &merge->len_s));
4895 
4896   m      = merge->rowmap->n;
4897   owners = merge->rowmap->range;
4898 
4899   /* determine the number of messages to send, their lengths */
4900   /*---------------------------------------------------------*/
4901   len_s = merge->len_s;
4902 
4903   len          = 0; /* length of buf_si[] */
4904   merge->nsend = 0;
4905   for (proc = 0; proc < size; proc++) {
4906     len_si[proc] = 0;
4907     if (proc == rank) {
4908       len_s[proc] = 0;
4909     } else {
4910       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4911       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4912     }
4913     if (len_s[proc]) {
4914       merge->nsend++;
4915       nrows = 0;
4916       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4917         if (ai[i + 1] > ai[i]) nrows++;
4918       }
4919       len_si[proc] = 2 * (nrows + 1);
4920       len += len_si[proc];
4921     }
4922   }
4923 
4924   /* determine the number and length of messages to receive for ij-structure */
4925   /*-------------------------------------------------------------------------*/
4926   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4927   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4928 
4929   /* post the Irecv of j-structure */
4930   /*-------------------------------*/
4931   PetscCall(PetscCommGetNewTag(comm, &tagj));
4932   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4933 
4934   /* post the Isend of j-structure */
4935   /*--------------------------------*/
4936   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4937 
4938   for (proc = 0, k = 0; proc < size; proc++) {
4939     if (!len_s[proc]) continue;
4940     i = owners[proc];
4941     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4942     k++;
4943   }
4944 
4945   /* receives and sends of j-structure are complete */
4946   /*------------------------------------------------*/
4947   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4948   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4949 
4950   /* send and recv i-structure */
4951   /*---------------------------*/
4952   PetscCall(PetscCommGetNewTag(comm, &tagi));
4953   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4954 
4955   PetscCall(PetscMalloc1(len + 1, &buf_s));
4956   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4957   for (proc = 0, k = 0; proc < size; proc++) {
4958     if (!len_s[proc]) continue;
4959     /* form outgoing message for i-structure:
4960          buf_si[0]:                 nrows to be sent
4961                [1:nrows]:           row index (global)
4962                [nrows+1:2*nrows+1]: i-structure index
4963     */
4964     /*-------------------------------------------*/
4965     nrows       = len_si[proc] / 2 - 1;
4966     buf_si_i    = buf_si + nrows + 1;
4967     buf_si[0]   = nrows;
4968     buf_si_i[0] = 0;
4969     nrows       = 0;
4970     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4971       anzi = ai[i + 1] - ai[i];
4972       if (anzi) {
4973         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4974         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4975         nrows++;
4976       }
4977     }
4978     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4979     k++;
4980     buf_si += len_si[proc];
4981   }
4982 
4983   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4984   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4985 
4986   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4987   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4988 
4989   PetscCall(PetscFree(len_si));
4990   PetscCall(PetscFree(len_ri));
4991   PetscCall(PetscFree(rj_waits));
4992   PetscCall(PetscFree2(si_waits, sj_waits));
4993   PetscCall(PetscFree(ri_waits));
4994   PetscCall(PetscFree(buf_s));
4995   PetscCall(PetscFree(status));
4996 
4997   /* compute a local seq matrix in each processor */
4998   /*----------------------------------------------*/
4999   /* allocate bi array and free space for accumulating nonzero column info */
5000   PetscCall(PetscMalloc1(m + 1, &bi));
5001   bi[0] = 0;
5002 
5003   /* create and initialize a linked list */
5004   nlnk = N + 1;
5005   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
5006 
5007   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5008   len = ai[owners[rank + 1]] - ai[owners[rank]];
5009   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5010 
5011   current_space = free_space;
5012 
5013   /* determine symbolic info for each local row */
5014   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5015 
5016   for (k = 0; k < merge->nrecv; k++) {
5017     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5018     nrows       = *buf_ri_k[k];
5019     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5020     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5021   }
5022 
5023   MatPreallocateBegin(comm, m, n, dnz, onz);
5024   len = 0;
5025   for (i = 0; i < m; i++) {
5026     bnzi = 0;
5027     /* add local non-zero cols of this proc's seqmat into lnk */
5028     arow = owners[rank] + i;
5029     anzi = ai[arow + 1] - ai[arow];
5030     aj   = a->j + ai[arow];
5031     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5032     bnzi += nlnk;
5033     /* add received col data into lnk */
5034     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5035       if (i == *nextrow[k]) {            /* i-th row */
5036         anzi = *(nextai[k] + 1) - *nextai[k];
5037         aj   = buf_rj[k] + *nextai[k];
5038         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5039         bnzi += nlnk;
5040         nextrow[k]++;
5041         nextai[k]++;
5042       }
5043     }
5044     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5045 
5046     /* if free space is not available, make more free space */
5047     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5048     /* copy data into free space, then initialize lnk */
5049     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5050     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5051 
5052     current_space->array += bnzi;
5053     current_space->local_used += bnzi;
5054     current_space->local_remaining -= bnzi;
5055 
5056     bi[i + 1] = bi[i] + bnzi;
5057   }
5058 
5059   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5060 
5061   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5062   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5063   PetscCall(PetscLLDestroy(lnk, lnkbt));
5064 
5065   /* create symbolic parallel matrix B_mpi */
5066   /*---------------------------------------*/
5067   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5068   PetscCall(MatCreate(comm, &B_mpi));
5069   if (n == PETSC_DECIDE) {
5070     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5071   } else {
5072     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5073   }
5074   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5075   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5076   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5077   MatPreallocateEnd(dnz, onz);
5078   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5079 
5080   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5081   B_mpi->assembled = PETSC_FALSE;
5082   merge->bi        = bi;
5083   merge->bj        = bj;
5084   merge->buf_ri    = buf_ri;
5085   merge->buf_rj    = buf_rj;
5086   merge->coi       = NULL;
5087   merge->coj       = NULL;
5088   merge->owners_co = NULL;
5089 
5090   PetscCall(PetscCommDestroy(&comm));
5091 
5092   /* attach the supporting struct to B_mpi for reuse */
5093   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5094   PetscCall(PetscContainerSetPointer(container, merge));
5095   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5096   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5097   PetscCall(PetscContainerDestroy(&container));
5098   *mpimat = B_mpi;
5099 
5100   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5101   PetscFunctionReturn(0);
5102 }
5103 
5104 /*@C
5105       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5106                  matrices from each processor
5107 
5108     Collective
5109 
5110    Input Parameters:
5111 +    comm - the communicators the parallel matrix will live on
5112 .    seqmat - the input sequential matrices
5113 .    m - number of local rows (or `PETSC_DECIDE`)
5114 .    n - number of local columns (or `PETSC_DECIDE`)
5115 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5116 
5117    Output Parameter:
5118 .    mpimat - the parallel matrix generated
5119 
5120     Level: advanced
5121 
5122    Note:
5123      The dimensions of the sequential matrix in each processor MUST be the same.
5124      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5125      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5126 @*/
5127 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5128 {
5129   PetscMPIInt size;
5130 
5131   PetscFunctionBegin;
5132   PetscCallMPI(MPI_Comm_size(comm, &size));
5133   if (size == 1) {
5134     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5135     if (scall == MAT_INITIAL_MATRIX) {
5136       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5137     } else {
5138       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5139     }
5140     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5141     PetscFunctionReturn(0);
5142   }
5143   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5144   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5145   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5146   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5147   PetscFunctionReturn(0);
5148 }
5149 
5150 /*@
5151      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5152           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5153           with `MatGetSize()`
5154 
5155     Not Collective
5156 
5157    Input Parameters:
5158 +    A - the matrix
5159 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5160 
5161    Output Parameter:
5162 .    A_loc - the local sequential matrix generated
5163 
5164     Level: developer
5165 
5166    Notes:
5167      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5168 
5169      Destroy the matrix with `MatDestroy()`
5170 
5171 .seealso: `MatMPIAIJGetLocalMat()`
5172 @*/
5173 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5174 {
5175   PetscBool mpi;
5176 
5177   PetscFunctionBegin;
5178   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5179   if (mpi) {
5180     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5181   } else {
5182     *A_loc = A;
5183     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5184   }
5185   PetscFunctionReturn(0);
5186 }
5187 
5188 /*@
5189      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5190           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5191           with `MatGetSize()`
5192 
5193     Not Collective
5194 
5195    Input Parameters:
5196 +    A - the matrix
5197 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5198 
5199    Output Parameter:
5200 .    A_loc - the local sequential matrix generated
5201 
5202     Level: developer
5203 
5204    Notes:
5205      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5206 
5207      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5208      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5209      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5210      modify the values of the returned A_loc.
5211 
5212 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5213 @*/
5214 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5215 {
5216   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5217   Mat_SeqAIJ        *mat, *a, *b;
5218   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5219   const PetscScalar *aa, *ba, *aav, *bav;
5220   PetscScalar       *ca, *cam;
5221   PetscMPIInt        size;
5222   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5223   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5224   PetscBool          match;
5225 
5226   PetscFunctionBegin;
5227   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5228   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5229   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5230   if (size == 1) {
5231     if (scall == MAT_INITIAL_MATRIX) {
5232       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5233       *A_loc = mpimat->A;
5234     } else if (scall == MAT_REUSE_MATRIX) {
5235       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5236     }
5237     PetscFunctionReturn(0);
5238   }
5239 
5240   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5241   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5242   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5243   ai = a->i;
5244   aj = a->j;
5245   bi = b->i;
5246   bj = b->j;
5247   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5248   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5249   aa = aav;
5250   ba = bav;
5251   if (scall == MAT_INITIAL_MATRIX) {
5252     PetscCall(PetscMalloc1(1 + am, &ci));
5253     ci[0] = 0;
5254     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5255     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5256     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5257     k = 0;
5258     for (i = 0; i < am; i++) {
5259       ncols_o = bi[i + 1] - bi[i];
5260       ncols_d = ai[i + 1] - ai[i];
5261       /* off-diagonal portion of A */
5262       for (jo = 0; jo < ncols_o; jo++) {
5263         col = cmap[*bj];
5264         if (col >= cstart) break;
5265         cj[k] = col;
5266         bj++;
5267         ca[k++] = *ba++;
5268       }
5269       /* diagonal portion of A */
5270       for (j = 0; j < ncols_d; j++) {
5271         cj[k]   = cstart + *aj++;
5272         ca[k++] = *aa++;
5273       }
5274       /* off-diagonal portion of A */
5275       for (j = jo; j < ncols_o; j++) {
5276         cj[k]   = cmap[*bj++];
5277         ca[k++] = *ba++;
5278       }
5279     }
5280     /* put together the new matrix */
5281     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5282     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5283     /* Since these are PETSc arrays, change flags to free them as necessary. */
5284     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5285     mat->free_a  = PETSC_TRUE;
5286     mat->free_ij = PETSC_TRUE;
5287     mat->nonew   = 0;
5288   } else if (scall == MAT_REUSE_MATRIX) {
5289     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5290     ci  = mat->i;
5291     cj  = mat->j;
5292     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5293     for (i = 0; i < am; i++) {
5294       /* off-diagonal portion of A */
5295       ncols_o = bi[i + 1] - bi[i];
5296       for (jo = 0; jo < ncols_o; jo++) {
5297         col = cmap[*bj];
5298         if (col >= cstart) break;
5299         *cam++ = *ba++;
5300         bj++;
5301       }
5302       /* diagonal portion of A */
5303       ncols_d = ai[i + 1] - ai[i];
5304       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5305       /* off-diagonal portion of A */
5306       for (j = jo; j < ncols_o; j++) {
5307         *cam++ = *ba++;
5308         bj++;
5309       }
5310     }
5311     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5312   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5313   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5314   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5315   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5316   PetscFunctionReturn(0);
5317 }
5318 
5319 /*@
5320      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5321           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5322 
5323     Not Collective
5324 
5325    Input Parameters:
5326 +    A - the matrix
5327 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5328 
5329    Output Parameters:
5330 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5331 -    A_loc - the local sequential matrix generated
5332 
5333     Level: developer
5334 
5335    Note:
5336      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5337 
5338 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5339 @*/
5340 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5341 {
5342   Mat             Ao, Ad;
5343   const PetscInt *cmap;
5344   PetscMPIInt     size;
5345   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5346 
5347   PetscFunctionBegin;
5348   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5349   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5350   if (size == 1) {
5351     if (scall == MAT_INITIAL_MATRIX) {
5352       PetscCall(PetscObjectReference((PetscObject)Ad));
5353       *A_loc = Ad;
5354     } else if (scall == MAT_REUSE_MATRIX) {
5355       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5356     }
5357     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5358     PetscFunctionReturn(0);
5359   }
5360   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5361   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5362   if (f) {
5363     PetscCall((*f)(A, scall, glob, A_loc));
5364   } else {
5365     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5366     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5367     Mat_SeqAIJ        *c;
5368     PetscInt          *ai = a->i, *aj = a->j;
5369     PetscInt          *bi = b->i, *bj = b->j;
5370     PetscInt          *ci, *cj;
5371     const PetscScalar *aa, *ba;
5372     PetscScalar       *ca;
5373     PetscInt           i, j, am, dn, on;
5374 
5375     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5376     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5377     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5378     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5379     if (scall == MAT_INITIAL_MATRIX) {
5380       PetscInt k;
5381       PetscCall(PetscMalloc1(1 + am, &ci));
5382       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5383       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5384       ci[0] = 0;
5385       for (i = 0, k = 0; i < am; i++) {
5386         const PetscInt ncols_o = bi[i + 1] - bi[i];
5387         const PetscInt ncols_d = ai[i + 1] - ai[i];
5388         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5389         /* diagonal portion of A */
5390         for (j = 0; j < ncols_d; j++, k++) {
5391           cj[k] = *aj++;
5392           ca[k] = *aa++;
5393         }
5394         /* off-diagonal portion of A */
5395         for (j = 0; j < ncols_o; j++, k++) {
5396           cj[k] = dn + *bj++;
5397           ca[k] = *ba++;
5398         }
5399       }
5400       /* put together the new matrix */
5401       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5402       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5403       /* Since these are PETSc arrays, change flags to free them as necessary. */
5404       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5405       c->free_a  = PETSC_TRUE;
5406       c->free_ij = PETSC_TRUE;
5407       c->nonew   = 0;
5408       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5409     } else if (scall == MAT_REUSE_MATRIX) {
5410       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5411       for (i = 0; i < am; i++) {
5412         const PetscInt ncols_d = ai[i + 1] - ai[i];
5413         const PetscInt ncols_o = bi[i + 1] - bi[i];
5414         /* diagonal portion of A */
5415         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5416         /* off-diagonal portion of A */
5417         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5418       }
5419       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5420     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5421     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5422     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5423     if (glob) {
5424       PetscInt cst, *gidx;
5425 
5426       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5427       PetscCall(PetscMalloc1(dn + on, &gidx));
5428       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5429       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5430       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5431     }
5432   }
5433   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5434   PetscFunctionReturn(0);
5435 }
5436 
5437 /*@C
5438      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5439 
5440     Not Collective
5441 
5442    Input Parameters:
5443 +    A - the matrix
5444 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5445 -    row, col - index sets of rows and columns to extract (or NULL)
5446 
5447    Output Parameter:
5448 .    A_loc - the local sequential matrix generated
5449 
5450     Level: developer
5451 
5452 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5453 @*/
5454 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5455 {
5456   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5457   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5458   IS          isrowa, iscola;
5459   Mat        *aloc;
5460   PetscBool   match;
5461 
5462   PetscFunctionBegin;
5463   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5464   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5465   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5466   if (!row) {
5467     start = A->rmap->rstart;
5468     end   = A->rmap->rend;
5469     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5470   } else {
5471     isrowa = *row;
5472   }
5473   if (!col) {
5474     start = A->cmap->rstart;
5475     cmap  = a->garray;
5476     nzA   = a->A->cmap->n;
5477     nzB   = a->B->cmap->n;
5478     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5479     ncols = 0;
5480     for (i = 0; i < nzB; i++) {
5481       if (cmap[i] < start) idx[ncols++] = cmap[i];
5482       else break;
5483     }
5484     imark = i;
5485     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5486     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5487     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5488   } else {
5489     iscola = *col;
5490   }
5491   if (scall != MAT_INITIAL_MATRIX) {
5492     PetscCall(PetscMalloc1(1, &aloc));
5493     aloc[0] = *A_loc;
5494   }
5495   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5496   if (!col) { /* attach global id of condensed columns */
5497     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5498   }
5499   *A_loc = aloc[0];
5500   PetscCall(PetscFree(aloc));
5501   if (!row) PetscCall(ISDestroy(&isrowa));
5502   if (!col) PetscCall(ISDestroy(&iscola));
5503   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5504   PetscFunctionReturn(0);
5505 }
5506 
5507 /*
5508  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5509  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5510  * on a global size.
5511  * */
5512 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5513 {
5514   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5515   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5516   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5517   PetscMPIInt            owner;
5518   PetscSFNode           *iremote, *oiremote;
5519   const PetscInt        *lrowindices;
5520   PetscSF                sf, osf;
5521   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5522   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5523   MPI_Comm               comm;
5524   ISLocalToGlobalMapping mapping;
5525   const PetscScalar     *pd_a, *po_a;
5526 
5527   PetscFunctionBegin;
5528   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5529   /* plocalsize is the number of roots
5530    * nrows is the number of leaves
5531    * */
5532   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5533   PetscCall(ISGetLocalSize(rows, &nrows));
5534   PetscCall(PetscCalloc1(nrows, &iremote));
5535   PetscCall(ISGetIndices(rows, &lrowindices));
5536   for (i = 0; i < nrows; i++) {
5537     /* Find a remote index and an owner for a row
5538      * The row could be local or remote
5539      * */
5540     owner = 0;
5541     lidx  = 0;
5542     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5543     iremote[i].index = lidx;
5544     iremote[i].rank  = owner;
5545   }
5546   /* Create SF to communicate how many nonzero columns for each row */
5547   PetscCall(PetscSFCreate(comm, &sf));
5548   /* SF will figure out the number of nonzero colunms for each row, and their
5549    * offsets
5550    * */
5551   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5552   PetscCall(PetscSFSetFromOptions(sf));
5553   PetscCall(PetscSFSetUp(sf));
5554 
5555   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5556   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5557   PetscCall(PetscCalloc1(nrows, &pnnz));
5558   roffsets[0] = 0;
5559   roffsets[1] = 0;
5560   for (i = 0; i < plocalsize; i++) {
5561     /* diag */
5562     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5563     /* off diag */
5564     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5565     /* compute offsets so that we relative location for each row */
5566     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5567     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5568   }
5569   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5570   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5571   /* 'r' means root, and 'l' means leaf */
5572   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5573   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5574   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5575   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5576   PetscCall(PetscSFDestroy(&sf));
5577   PetscCall(PetscFree(roffsets));
5578   PetscCall(PetscFree(nrcols));
5579   dntotalcols = 0;
5580   ontotalcols = 0;
5581   ncol        = 0;
5582   for (i = 0; i < nrows; i++) {
5583     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5584     ncol    = PetscMax(pnnz[i], ncol);
5585     /* diag */
5586     dntotalcols += nlcols[i * 2 + 0];
5587     /* off diag */
5588     ontotalcols += nlcols[i * 2 + 1];
5589   }
5590   /* We do not need to figure the right number of columns
5591    * since all the calculations will be done by going through the raw data
5592    * */
5593   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5594   PetscCall(MatSetUp(*P_oth));
5595   PetscCall(PetscFree(pnnz));
5596   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5597   /* diag */
5598   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5599   /* off diag */
5600   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5601   /* diag */
5602   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5603   /* off diag */
5604   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5605   dntotalcols = 0;
5606   ontotalcols = 0;
5607   ntotalcols  = 0;
5608   for (i = 0; i < nrows; i++) {
5609     owner = 0;
5610     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5611     /* Set iremote for diag matrix */
5612     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5613       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5614       iremote[dntotalcols].rank  = owner;
5615       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5616       ilocal[dntotalcols++] = ntotalcols++;
5617     }
5618     /* off diag */
5619     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5620       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5621       oiremote[ontotalcols].rank  = owner;
5622       oilocal[ontotalcols++]      = ntotalcols++;
5623     }
5624   }
5625   PetscCall(ISRestoreIndices(rows, &lrowindices));
5626   PetscCall(PetscFree(loffsets));
5627   PetscCall(PetscFree(nlcols));
5628   PetscCall(PetscSFCreate(comm, &sf));
5629   /* P serves as roots and P_oth is leaves
5630    * Diag matrix
5631    * */
5632   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5633   PetscCall(PetscSFSetFromOptions(sf));
5634   PetscCall(PetscSFSetUp(sf));
5635 
5636   PetscCall(PetscSFCreate(comm, &osf));
5637   /* Off diag */
5638   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5639   PetscCall(PetscSFSetFromOptions(osf));
5640   PetscCall(PetscSFSetUp(osf));
5641   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5642   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5643   /* We operate on the matrix internal data for saving memory */
5644   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5645   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5646   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5647   /* Convert to global indices for diag matrix */
5648   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5649   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5650   /* We want P_oth store global indices */
5651   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5652   /* Use memory scalable approach */
5653   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5654   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5655   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5656   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5657   /* Convert back to local indices */
5658   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5659   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5660   nout = 0;
5661   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5662   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5663   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5664   /* Exchange values */
5665   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5666   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5667   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5668   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5669   /* Stop PETSc from shrinking memory */
5670   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5671   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5672   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5673   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5674   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5675   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5676   PetscCall(PetscSFDestroy(&sf));
5677   PetscCall(PetscSFDestroy(&osf));
5678   PetscFunctionReturn(0);
5679 }
5680 
5681 /*
5682  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5683  * This supports MPIAIJ and MAIJ
5684  * */
5685 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5686 {
5687   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5688   Mat_SeqAIJ *p_oth;
5689   IS          rows, map;
5690   PetscHMapI  hamp;
5691   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5692   MPI_Comm    comm;
5693   PetscSF     sf, osf;
5694   PetscBool   has;
5695 
5696   PetscFunctionBegin;
5697   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5698   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5699   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5700    *  and then create a submatrix (that often is an overlapping matrix)
5701    * */
5702   if (reuse == MAT_INITIAL_MATRIX) {
5703     /* Use a hash table to figure out unique keys */
5704     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5705     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5706     count = 0;
5707     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5708     for (i = 0; i < a->B->cmap->n; i++) {
5709       key = a->garray[i] / dof;
5710       PetscCall(PetscHMapIHas(hamp, key, &has));
5711       if (!has) {
5712         mapping[i] = count;
5713         PetscCall(PetscHMapISet(hamp, key, count++));
5714       } else {
5715         /* Current 'i' has the same value the previous step */
5716         mapping[i] = count - 1;
5717       }
5718     }
5719     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5720     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5721     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5722     PetscCall(PetscCalloc1(htsize, &rowindices));
5723     off = 0;
5724     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5725     PetscCall(PetscHMapIDestroy(&hamp));
5726     PetscCall(PetscSortInt(htsize, rowindices));
5727     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5728     /* In case, the matrix was already created but users want to recreate the matrix */
5729     PetscCall(MatDestroy(P_oth));
5730     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5731     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5732     PetscCall(ISDestroy(&map));
5733     PetscCall(ISDestroy(&rows));
5734   } else if (reuse == MAT_REUSE_MATRIX) {
5735     /* If matrix was already created, we simply update values using SF objects
5736      * that as attached to the matrix ealier.
5737      */
5738     const PetscScalar *pd_a, *po_a;
5739 
5740     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5741     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5742     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5743     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5744     /* Update values in place */
5745     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5746     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5747     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5748     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5749     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5750     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5751     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5752     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5753   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5754   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5755   PetscFunctionReturn(0);
5756 }
5757 
5758 /*@C
5759   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5760 
5761   Collective
5762 
5763   Input Parameters:
5764 + A - the first matrix in `MATMPIAIJ` format
5765 . B - the second matrix in `MATMPIAIJ` format
5766 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5767 
5768   Output Parameters:
5769 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5770 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5771 - B_seq - the sequential matrix generated
5772 
5773   Level: developer
5774 
5775 @*/
5776 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5777 {
5778   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5779   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5780   IS          isrowb, iscolb;
5781   Mat        *bseq = NULL;
5782 
5783   PetscFunctionBegin;
5784   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5785     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5786   }
5787   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5788 
5789   if (scall == MAT_INITIAL_MATRIX) {
5790     start = A->cmap->rstart;
5791     cmap  = a->garray;
5792     nzA   = a->A->cmap->n;
5793     nzB   = a->B->cmap->n;
5794     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5795     ncols = 0;
5796     for (i = 0; i < nzB; i++) { /* row < local row index */
5797       if (cmap[i] < start) idx[ncols++] = cmap[i];
5798       else break;
5799     }
5800     imark = i;
5801     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5802     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5803     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5804     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5805   } else {
5806     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5807     isrowb = *rowb;
5808     iscolb = *colb;
5809     PetscCall(PetscMalloc1(1, &bseq));
5810     bseq[0] = *B_seq;
5811   }
5812   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5813   *B_seq = bseq[0];
5814   PetscCall(PetscFree(bseq));
5815   if (!rowb) {
5816     PetscCall(ISDestroy(&isrowb));
5817   } else {
5818     *rowb = isrowb;
5819   }
5820   if (!colb) {
5821     PetscCall(ISDestroy(&iscolb));
5822   } else {
5823     *colb = iscolb;
5824   }
5825   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5826   PetscFunctionReturn(0);
5827 }
5828 
5829 /*
5830     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5831     of the OFF-DIAGONAL portion of local A
5832 
5833     Collective
5834 
5835    Input Parameters:
5836 +    A,B - the matrices in mpiaij format
5837 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5838 
5839    Output Parameter:
5840 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5841 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5842 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5843 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5844 
5845     Developer Note:
5846     This directly accesses information inside the VecScatter associated with the matrix-vector product
5847      for this matrix. This is not desirable..
5848 
5849     Level: developer
5850 
5851 */
5852 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5853 {
5854   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5855   Mat_SeqAIJ        *b_oth;
5856   VecScatter         ctx;
5857   MPI_Comm           comm;
5858   const PetscMPIInt *rprocs, *sprocs;
5859   const PetscInt    *srow, *rstarts, *sstarts;
5860   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5861   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5862   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5863   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5864   PetscMPIInt        size, tag, rank, nreqs;
5865 
5866   PetscFunctionBegin;
5867   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5868   PetscCallMPI(MPI_Comm_size(comm, &size));
5869 
5870   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5871     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5872   }
5873   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5874   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5875 
5876   if (size == 1) {
5877     startsj_s = NULL;
5878     bufa_ptr  = NULL;
5879     *B_oth    = NULL;
5880     PetscFunctionReturn(0);
5881   }
5882 
5883   ctx = a->Mvctx;
5884   tag = ((PetscObject)ctx)->tag;
5885 
5886   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5887   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5888   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5889   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5890   PetscCall(PetscMalloc1(nreqs, &reqs));
5891   rwaits = reqs;
5892   swaits = reqs + nrecvs;
5893 
5894   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5895   if (scall == MAT_INITIAL_MATRIX) {
5896     /* i-array */
5897     /*---------*/
5898     /*  post receives */
5899     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5900     for (i = 0; i < nrecvs; i++) {
5901       rowlen = rvalues + rstarts[i] * rbs;
5902       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5903       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5904     }
5905 
5906     /* pack the outgoing message */
5907     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5908 
5909     sstartsj[0] = 0;
5910     rstartsj[0] = 0;
5911     len         = 0; /* total length of j or a array to be sent */
5912     if (nsends) {
5913       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5914       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5915     }
5916     for (i = 0; i < nsends; i++) {
5917       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5918       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5919       for (j = 0; j < nrows; j++) {
5920         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5921         for (l = 0; l < sbs; l++) {
5922           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5923 
5924           rowlen[j * sbs + l] = ncols;
5925 
5926           len += ncols;
5927           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5928         }
5929         k++;
5930       }
5931       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5932 
5933       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5934     }
5935     /* recvs and sends of i-array are completed */
5936     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5937     PetscCall(PetscFree(svalues));
5938 
5939     /* allocate buffers for sending j and a arrays */
5940     PetscCall(PetscMalloc1(len + 1, &bufj));
5941     PetscCall(PetscMalloc1(len + 1, &bufa));
5942 
5943     /* create i-array of B_oth */
5944     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5945 
5946     b_othi[0] = 0;
5947     len       = 0; /* total length of j or a array to be received */
5948     k         = 0;
5949     for (i = 0; i < nrecvs; i++) {
5950       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5951       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5952       for (j = 0; j < nrows; j++) {
5953         b_othi[k + 1] = b_othi[k] + rowlen[j];
5954         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5955         k++;
5956       }
5957       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5958     }
5959     PetscCall(PetscFree(rvalues));
5960 
5961     /* allocate space for j and a arrays of B_oth */
5962     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5963     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5964 
5965     /* j-array */
5966     /*---------*/
5967     /*  post receives of j-array */
5968     for (i = 0; i < nrecvs; i++) {
5969       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5970       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5971     }
5972 
5973     /* pack the outgoing message j-array */
5974     if (nsends) k = sstarts[0];
5975     for (i = 0; i < nsends; i++) {
5976       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5977       bufJ  = bufj + sstartsj[i];
5978       for (j = 0; j < nrows; j++) {
5979         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5980         for (ll = 0; ll < sbs; ll++) {
5981           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5982           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5983           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5984         }
5985       }
5986       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5987     }
5988 
5989     /* recvs and sends of j-array are completed */
5990     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5991   } else if (scall == MAT_REUSE_MATRIX) {
5992     sstartsj = *startsj_s;
5993     rstartsj = *startsj_r;
5994     bufa     = *bufa_ptr;
5995     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5996     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5997   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5998 
5999   /* a-array */
6000   /*---------*/
6001   /*  post receives of a-array */
6002   for (i = 0; i < nrecvs; i++) {
6003     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
6004     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
6005   }
6006 
6007   /* pack the outgoing message a-array */
6008   if (nsends) k = sstarts[0];
6009   for (i = 0; i < nsends; i++) {
6010     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6011     bufA  = bufa + sstartsj[i];
6012     for (j = 0; j < nrows; j++) {
6013       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6014       for (ll = 0; ll < sbs; ll++) {
6015         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6016         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6017         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6018       }
6019     }
6020     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6021   }
6022   /* recvs and sends of a-array are completed */
6023   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6024   PetscCall(PetscFree(reqs));
6025 
6026   if (scall == MAT_INITIAL_MATRIX) {
6027     /* put together the new matrix */
6028     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6029 
6030     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6031     /* Since these are PETSc arrays, change flags to free them as necessary. */
6032     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6033     b_oth->free_a  = PETSC_TRUE;
6034     b_oth->free_ij = PETSC_TRUE;
6035     b_oth->nonew   = 0;
6036 
6037     PetscCall(PetscFree(bufj));
6038     if (!startsj_s || !bufa_ptr) {
6039       PetscCall(PetscFree2(sstartsj, rstartsj));
6040       PetscCall(PetscFree(bufa_ptr));
6041     } else {
6042       *startsj_s = sstartsj;
6043       *startsj_r = rstartsj;
6044       *bufa_ptr  = bufa;
6045     }
6046   } else if (scall == MAT_REUSE_MATRIX) {
6047     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6048   }
6049 
6050   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6051   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6052   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6053   PetscFunctionReturn(0);
6054 }
6055 
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6059 #if defined(PETSC_HAVE_MKL_SPARSE)
6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6061 #endif
6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6064 #if defined(PETSC_HAVE_ELEMENTAL)
6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6066 #endif
6067 #if defined(PETSC_HAVE_SCALAPACK)
6068 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6069 #endif
6070 #if defined(PETSC_HAVE_HYPRE)
6071 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6072 #endif
6073 #if defined(PETSC_HAVE_CUDA)
6074 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6075 #endif
6076 #if defined(PETSC_HAVE_HIP)
6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6078 #endif
6079 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6081 #endif
6082 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6083 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6084 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6085 
6086 /*
6087     Computes (B'*A')' since computing B*A directly is untenable
6088 
6089                n                       p                          p
6090         [             ]       [             ]         [                 ]
6091       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6092         [             ]       [             ]         [                 ]
6093 
6094 */
6095 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6096 {
6097   Mat At, Bt, Ct;
6098 
6099   PetscFunctionBegin;
6100   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6101   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6102   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6103   PetscCall(MatDestroy(&At));
6104   PetscCall(MatDestroy(&Bt));
6105   PetscCall(MatTransposeSetPrecursor(Ct, C));
6106   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6107   PetscCall(MatDestroy(&Ct));
6108   PetscFunctionReturn(0);
6109 }
6110 
6111 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6112 {
6113   PetscBool cisdense;
6114 
6115   PetscFunctionBegin;
6116   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6117   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6118   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6119   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6120   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6121   PetscCall(MatSetUp(C));
6122 
6123   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6124   PetscFunctionReturn(0);
6125 }
6126 
6127 /* ----------------------------------------------------------------*/
6128 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6129 {
6130   Mat_Product *product = C->product;
6131   Mat          A = product->A, B = product->B;
6132 
6133   PetscFunctionBegin;
6134   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6135     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6136 
6137   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6138   C->ops->productsymbolic = MatProductSymbolic_AB;
6139   PetscFunctionReturn(0);
6140 }
6141 
6142 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6143 {
6144   Mat_Product *product = C->product;
6145 
6146   PetscFunctionBegin;
6147   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6148   PetscFunctionReturn(0);
6149 }
6150 
6151 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6152 
6153   Input Parameters:
6154 
6155     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6156     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6157 
6158     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6159 
6160     For Set1, j1[] contains column indices of the nonzeros.
6161     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6162     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6163     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6164 
6165     Similar for Set2.
6166 
6167     This routine merges the two sets of nonzeros row by row and removes repeats.
6168 
6169   Output Parameters: (memory is allocated by the caller)
6170 
6171     i[],j[]: the CSR of the merged matrix, which has m rows.
6172     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6173     imap2[]: similar to imap1[], but for Set2.
6174     Note we order nonzeros row-by-row and from left to right.
6175 */
6176 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6177 {
6178   PetscInt   r, m; /* Row index of mat */
6179   PetscCount t, t1, t2, b1, e1, b2, e2;
6180 
6181   PetscFunctionBegin;
6182   PetscCall(MatGetLocalSize(mat, &m, NULL));
6183   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6184   i[0]        = 0;
6185   for (r = 0; r < m; r++) { /* Do row by row merging */
6186     b1 = rowBegin1[r];
6187     e1 = rowEnd1[r];
6188     b2 = rowBegin2[r];
6189     e2 = rowEnd2[r];
6190     while (b1 < e1 && b2 < e2) {
6191       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6192         j[t]      = j1[b1];
6193         imap1[t1] = t;
6194         imap2[t2] = t;
6195         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6196         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6197         t1++;
6198         t2++;
6199         t++;
6200       } else if (j1[b1] < j2[b2]) {
6201         j[t]      = j1[b1];
6202         imap1[t1] = t;
6203         b1 += jmap1[t1 + 1] - jmap1[t1];
6204         t1++;
6205         t++;
6206       } else {
6207         j[t]      = j2[b2];
6208         imap2[t2] = t;
6209         b2 += jmap2[t2 + 1] - jmap2[t2];
6210         t2++;
6211         t++;
6212       }
6213     }
6214     /* Merge the remaining in either j1[] or j2[] */
6215     while (b1 < e1) {
6216       j[t]      = j1[b1];
6217       imap1[t1] = t;
6218       b1 += jmap1[t1 + 1] - jmap1[t1];
6219       t1++;
6220       t++;
6221     }
6222     while (b2 < e2) {
6223       j[t]      = j2[b2];
6224       imap2[t2] = t;
6225       b2 += jmap2[t2 + 1] - jmap2[t2];
6226       t2++;
6227       t++;
6228     }
6229     i[r + 1] = t;
6230   }
6231   PetscFunctionReturn(0);
6232 }
6233 
6234 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6235 
6236   Input Parameters:
6237     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6238     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6239       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6240 
6241       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6242       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6243 
6244   Output Parameters:
6245     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6246     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6247       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6248       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6249 
6250     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6251       Atot: number of entries belonging to the diagonal block.
6252       Annz: number of unique nonzeros belonging to the diagonal block.
6253       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6254         repeats (i.e., same 'i,j' pair).
6255       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6256         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6257 
6258       Atot: number of entries belonging to the diagonal block
6259       Annz: number of unique nonzeros belonging to the diagonal block.
6260 
6261     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6262 
6263     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6264 */
6265 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6266 {
6267   PetscInt    cstart, cend, rstart, rend, row, col;
6268   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6269   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6270   PetscCount  k, m, p, q, r, s, mid;
6271   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6272 
6273   PetscFunctionBegin;
6274   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6275   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6276   m = rend - rstart;
6277 
6278   for (k = 0; k < n; k++) {
6279     if (i[k] >= 0) break;
6280   } /* Skip negative rows */
6281 
6282   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6283      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6284   */
6285   while (k < n) {
6286     row = i[k];
6287     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6288     for (s = k; s < n; s++)
6289       if (i[s] != row) break;
6290     for (p = k; p < s; p++) {
6291       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6292       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6293     }
6294     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6295     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6296     rowBegin[row - rstart] = k;
6297     rowMid[row - rstart]   = mid;
6298     rowEnd[row - rstart]   = s;
6299 
6300     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6301     Atot += mid - k;
6302     Btot += s - mid;
6303 
6304     /* Count unique nonzeros of this diag/offdiag row */
6305     for (p = k; p < mid;) {
6306       col = j[p];
6307       do {
6308         j[p] += PETSC_MAX_INT;
6309         p++;
6310       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6311       Annz++;
6312     }
6313 
6314     for (p = mid; p < s;) {
6315       col = j[p];
6316       do {
6317         p++;
6318       } while (p < s && j[p] == col);
6319       Bnnz++;
6320     }
6321     k = s;
6322   }
6323 
6324   /* Allocation according to Atot, Btot, Annz, Bnnz */
6325   PetscCall(PetscMalloc1(Atot, &Aperm));
6326   PetscCall(PetscMalloc1(Btot, &Bperm));
6327   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6328   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6329 
6330   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6331   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6332   for (r = 0; r < m; r++) {
6333     k   = rowBegin[r];
6334     mid = rowMid[r];
6335     s   = rowEnd[r];
6336     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6337     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6338     Atot += mid - k;
6339     Btot += s - mid;
6340 
6341     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6342     for (p = k; p < mid;) {
6343       col = j[p];
6344       q   = p;
6345       do {
6346         p++;
6347       } while (p < mid && j[p] == col);
6348       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6349       Annz++;
6350     }
6351 
6352     for (p = mid; p < s;) {
6353       col = j[p];
6354       q   = p;
6355       do {
6356         p++;
6357       } while (p < s && j[p] == col);
6358       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6359       Bnnz++;
6360     }
6361   }
6362   /* Output */
6363   *Aperm_ = Aperm;
6364   *Annz_  = Annz;
6365   *Atot_  = Atot;
6366   *Ajmap_ = Ajmap;
6367   *Bperm_ = Bperm;
6368   *Bnnz_  = Bnnz;
6369   *Btot_  = Btot;
6370   *Bjmap_ = Bjmap;
6371   PetscFunctionReturn(0);
6372 }
6373 
6374 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6375 
6376   Input Parameters:
6377     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6378     nnz:  number of unique nonzeros in the merged matrix
6379     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6380     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6381 
6382   Output Parameter: (memory is allocated by the caller)
6383     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6384 
6385   Example:
6386     nnz1 = 4
6387     nnz  = 6
6388     imap = [1,3,4,5]
6389     jmap = [0,3,5,6,7]
6390    then,
6391     jmap_new = [0,0,3,3,5,6,7]
6392 */
6393 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6394 {
6395   PetscCount k, p;
6396 
6397   PetscFunctionBegin;
6398   jmap_new[0] = 0;
6399   p           = nnz;                /* p loops over jmap_new[] backwards */
6400   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6401     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6402   }
6403   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6404   PetscFunctionReturn(0);
6405 }
6406 
6407 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6408 {
6409   MPI_Comm    comm;
6410   PetscMPIInt rank, size;
6411   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6412   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6413   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6414 
6415   PetscFunctionBegin;
6416   PetscCall(PetscFree(mpiaij->garray));
6417   PetscCall(VecDestroy(&mpiaij->lvec));
6418 #if defined(PETSC_USE_CTABLE)
6419   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6420 #else
6421   PetscCall(PetscFree(mpiaij->colmap));
6422 #endif
6423   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6424   mat->assembled     = PETSC_FALSE;
6425   mat->was_assembled = PETSC_FALSE;
6426   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6427 
6428   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6429   PetscCallMPI(MPI_Comm_size(comm, &size));
6430   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6431   PetscCall(PetscLayoutSetUp(mat->rmap));
6432   PetscCall(PetscLayoutSetUp(mat->cmap));
6433   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6434   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6435   PetscCall(MatGetLocalSize(mat, &m, &n));
6436   PetscCall(MatGetSize(mat, &M, &N));
6437 
6438   /* ---------------------------------------------------------------------------*/
6439   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6440   /* entries come first, then local rows, then remote rows.                     */
6441   /* ---------------------------------------------------------------------------*/
6442   PetscCount n1 = coo_n, *perm1;
6443   PetscInt  *i1 = coo_i, *j1 = coo_j;
6444 
6445   PetscCall(PetscMalloc1(n1, &perm1));
6446   for (k = 0; k < n1; k++) perm1[k] = k;
6447 
6448   /* Manipulate indices so that entries with negative row or col indices will have smallest
6449      row indices, local entries will have greater but negative row indices, and remote entries
6450      will have positive row indices.
6451   */
6452   for (k = 0; k < n1; k++) {
6453     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6454     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6455     else {
6456       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6457       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6458     }
6459   }
6460 
6461   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6462   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6463   for (k = 0; k < n1; k++) {
6464     if (i1[k] > PETSC_MIN_INT) break;
6465   }                                                                               /* Advance k to the first entry we need to take care of */
6466   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6467   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6468 
6469   /* ---------------------------------------------------------------------------*/
6470   /*           Split local rows into diag/offdiag portions                      */
6471   /* ---------------------------------------------------------------------------*/
6472   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6473   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6474   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6475 
6476   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6477   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6478   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6479 
6480   /* ---------------------------------------------------------------------------*/
6481   /*           Send remote rows to their owner                                  */
6482   /* ---------------------------------------------------------------------------*/
6483   /* Find which rows should be sent to which remote ranks*/
6484   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6485   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6486   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6487   const PetscInt *ranges;
6488   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6489 
6490   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6491   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6492   for (k = rem; k < n1;) {
6493     PetscMPIInt owner;
6494     PetscInt    firstRow, lastRow;
6495 
6496     /* Locate a row range */
6497     firstRow = i1[k]; /* first row of this owner */
6498     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6499     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6500 
6501     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6502     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6503 
6504     /* All entries in [k,p) belong to this remote owner */
6505     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6506       PetscMPIInt *sendto2;
6507       PetscInt    *nentries2;
6508       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6509 
6510       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6511       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6512       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6513       PetscCall(PetscFree2(sendto, nentries2));
6514       sendto   = sendto2;
6515       nentries = nentries2;
6516       maxNsend = maxNsend2;
6517     }
6518     sendto[nsend]   = owner;
6519     nentries[nsend] = p - k;
6520     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6521     nsend++;
6522     k = p;
6523   }
6524 
6525   /* Build 1st SF to know offsets on remote to send data */
6526   PetscSF      sf1;
6527   PetscInt     nroots = 1, nroots2 = 0;
6528   PetscInt     nleaves = nsend, nleaves2 = 0;
6529   PetscInt    *offsets;
6530   PetscSFNode *iremote;
6531 
6532   PetscCall(PetscSFCreate(comm, &sf1));
6533   PetscCall(PetscMalloc1(nsend, &iremote));
6534   PetscCall(PetscMalloc1(nsend, &offsets));
6535   for (k = 0; k < nsend; k++) {
6536     iremote[k].rank  = sendto[k];
6537     iremote[k].index = 0;
6538     nleaves2 += nentries[k];
6539     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6540   }
6541   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6542   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6543   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6544   PetscCall(PetscSFDestroy(&sf1));
6545   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6546 
6547   /* Build 2nd SF to send remote COOs to their owner */
6548   PetscSF sf2;
6549   nroots  = nroots2;
6550   nleaves = nleaves2;
6551   PetscCall(PetscSFCreate(comm, &sf2));
6552   PetscCall(PetscSFSetFromOptions(sf2));
6553   PetscCall(PetscMalloc1(nleaves, &iremote));
6554   p = 0;
6555   for (k = 0; k < nsend; k++) {
6556     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6557     for (q = 0; q < nentries[k]; q++, p++) {
6558       iremote[p].rank  = sendto[k];
6559       iremote[p].index = offsets[k] + q;
6560     }
6561   }
6562   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6563 
6564   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6565   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6566 
6567   /* Send the remote COOs to their owner */
6568   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6569   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6570   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6571   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6572   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6573   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6574   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6575 
6576   PetscCall(PetscFree(offsets));
6577   PetscCall(PetscFree2(sendto, nentries));
6578 
6579   /* ---------------------------------------------------------------*/
6580   /* Sort received COOs by row along with the permutation array     */
6581   /* ---------------------------------------------------------------*/
6582   for (k = 0; k < n2; k++) perm2[k] = k;
6583   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6584 
6585   /* ---------------------------------------------------------------*/
6586   /* Split received COOs into diag/offdiag portions                 */
6587   /* ---------------------------------------------------------------*/
6588   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6589   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6590   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6591 
6592   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6593   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6594 
6595   /* --------------------------------------------------------------------------*/
6596   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6597   /* --------------------------------------------------------------------------*/
6598   PetscInt *Ai, *Bi;
6599   PetscInt *Aj, *Bj;
6600 
6601   PetscCall(PetscMalloc1(m + 1, &Ai));
6602   PetscCall(PetscMalloc1(m + 1, &Bi));
6603   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6604   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6605 
6606   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6607   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6608   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6609   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6610   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6611 
6612   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6613   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6614 
6615   /* --------------------------------------------------------------------------*/
6616   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6617   /* expect nonzeros in A/B most likely have local contributing entries        */
6618   /* --------------------------------------------------------------------------*/
6619   PetscInt    Annz = Ai[m];
6620   PetscInt    Bnnz = Bi[m];
6621   PetscCount *Ajmap1_new, *Bjmap1_new;
6622 
6623   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6624   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6625 
6626   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6627   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6628 
6629   PetscCall(PetscFree(Aimap1));
6630   PetscCall(PetscFree(Ajmap1));
6631   PetscCall(PetscFree(Bimap1));
6632   PetscCall(PetscFree(Bjmap1));
6633   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6634   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6635   PetscCall(PetscFree(perm1));
6636   PetscCall(PetscFree3(i2, j2, perm2));
6637 
6638   Ajmap1 = Ajmap1_new;
6639   Bjmap1 = Bjmap1_new;
6640 
6641   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6642   if (Annz < Annz1 + Annz2) {
6643     PetscInt *Aj_new;
6644     PetscCall(PetscMalloc1(Annz, &Aj_new));
6645     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6646     PetscCall(PetscFree(Aj));
6647     Aj = Aj_new;
6648   }
6649 
6650   if (Bnnz < Bnnz1 + Bnnz2) {
6651     PetscInt *Bj_new;
6652     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6653     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6654     PetscCall(PetscFree(Bj));
6655     Bj = Bj_new;
6656   }
6657 
6658   /* --------------------------------------------------------------------------------*/
6659   /* Create new submatrices for on-process and off-process coupling                  */
6660   /* --------------------------------------------------------------------------------*/
6661   PetscScalar *Aa, *Ba;
6662   MatType      rtype;
6663   Mat_SeqAIJ  *a, *b;
6664   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6665   PetscCall(PetscCalloc1(Bnnz, &Ba));
6666   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6667   if (cstart) {
6668     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6669   }
6670   PetscCall(MatDestroy(&mpiaij->A));
6671   PetscCall(MatDestroy(&mpiaij->B));
6672   PetscCall(MatGetRootType_Private(mat, &rtype));
6673   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6674   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6675   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6676 
6677   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6678   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6679   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6680   a->free_a = b->free_a = PETSC_TRUE;
6681   a->free_ij = b->free_ij = PETSC_TRUE;
6682 
6683   /* conversion must happen AFTER multiply setup */
6684   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6685   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6686   PetscCall(VecDestroy(&mpiaij->lvec));
6687   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6688 
6689   mpiaij->coo_n   = coo_n;
6690   mpiaij->coo_sf  = sf2;
6691   mpiaij->sendlen = nleaves;
6692   mpiaij->recvlen = nroots;
6693 
6694   mpiaij->Annz = Annz;
6695   mpiaij->Bnnz = Bnnz;
6696 
6697   mpiaij->Annz2 = Annz2;
6698   mpiaij->Bnnz2 = Bnnz2;
6699 
6700   mpiaij->Atot1 = Atot1;
6701   mpiaij->Atot2 = Atot2;
6702   mpiaij->Btot1 = Btot1;
6703   mpiaij->Btot2 = Btot2;
6704 
6705   mpiaij->Ajmap1 = Ajmap1;
6706   mpiaij->Aperm1 = Aperm1;
6707 
6708   mpiaij->Bjmap1 = Bjmap1;
6709   mpiaij->Bperm1 = Bperm1;
6710 
6711   mpiaij->Aimap2 = Aimap2;
6712   mpiaij->Ajmap2 = Ajmap2;
6713   mpiaij->Aperm2 = Aperm2;
6714 
6715   mpiaij->Bimap2 = Bimap2;
6716   mpiaij->Bjmap2 = Bjmap2;
6717   mpiaij->Bperm2 = Bperm2;
6718 
6719   mpiaij->Cperm1 = Cperm1;
6720 
6721   /* Allocate in preallocation. If not used, it has zero cost on host */
6722   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6723   PetscFunctionReturn(0);
6724 }
6725 
6726 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6727 {
6728   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6729   Mat               A = mpiaij->A, B = mpiaij->B;
6730   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6731   PetscScalar      *Aa, *Ba;
6732   PetscScalar      *sendbuf = mpiaij->sendbuf;
6733   PetscScalar      *recvbuf = mpiaij->recvbuf;
6734   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6735   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6736   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6737   const PetscCount *Cperm1 = mpiaij->Cperm1;
6738 
6739   PetscFunctionBegin;
6740   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6741   PetscCall(MatSeqAIJGetArray(B, &Ba));
6742 
6743   /* Pack entries to be sent to remote */
6744   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6745 
6746   /* Send remote entries to their owner and overlap the communication with local computation */
6747   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6748   /* Add local entries to A and B */
6749   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6750     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6751     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6752     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6753   }
6754   for (PetscCount i = 0; i < Bnnz; i++) {
6755     PetscScalar sum = 0.0;
6756     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6757     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6758   }
6759   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6760 
6761   /* Add received remote entries to A and B */
6762   for (PetscCount i = 0; i < Annz2; i++) {
6763     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6764   }
6765   for (PetscCount i = 0; i < Bnnz2; i++) {
6766     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6767   }
6768   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6769   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6770   PetscFunctionReturn(0);
6771 }
6772 
6773 /* ----------------------------------------------------------------*/
6774 
6775 /*MC
6776    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6777 
6778    Options Database Keys:
6779 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6780 
6781    Level: beginner
6782 
6783    Notes:
6784     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6785     in this case the values associated with the rows and columns one passes in are set to zero
6786     in the matrix
6787 
6788     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6789     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6790 
6791 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6792 M*/
6793 
6794 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6795 {
6796   Mat_MPIAIJ *b;
6797   PetscMPIInt size;
6798 
6799   PetscFunctionBegin;
6800   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6801 
6802   PetscCall(PetscNew(&b));
6803   B->data = (void *)b;
6804   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6805   B->assembled  = PETSC_FALSE;
6806   B->insertmode = NOT_SET_VALUES;
6807   b->size       = size;
6808 
6809   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6810 
6811   /* build cache for off array entries formed */
6812   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6813 
6814   b->donotstash  = PETSC_FALSE;
6815   b->colmap      = NULL;
6816   b->garray      = NULL;
6817   b->roworiented = PETSC_TRUE;
6818 
6819   /* stuff used for matrix vector multiply */
6820   b->lvec  = NULL;
6821   b->Mvctx = NULL;
6822 
6823   /* stuff for MatGetRow() */
6824   b->rowindices   = NULL;
6825   b->rowvalues    = NULL;
6826   b->getrowactive = PETSC_FALSE;
6827 
6828   /* flexible pointer used in CUSPARSE classes */
6829   b->spptr = NULL;
6830 
6831   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6832   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6841 #if defined(PETSC_HAVE_CUDA)
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6843 #endif
6844 #if defined(PETSC_HAVE_HIP)
6845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6846 #endif
6847 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6848   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6849 #endif
6850 #if defined(PETSC_HAVE_MKL_SPARSE)
6851   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6852 #endif
6853   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6854   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6857 #if defined(PETSC_HAVE_ELEMENTAL)
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6859 #endif
6860 #if defined(PETSC_HAVE_SCALAPACK)
6861   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6862 #endif
6863   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6865 #if defined(PETSC_HAVE_HYPRE)
6866   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6867   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6868 #endif
6869   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6870   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6871   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6873   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6874   PetscFunctionReturn(0);
6875 }
6876 
6877 /*@C
6878      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6879          and "off-diagonal" part of the matrix in CSR format.
6880 
6881    Collective
6882 
6883    Input Parameters:
6884 +  comm - MPI communicator
6885 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6886 .  n - This value should be the same as the local size used in creating the
6887        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6888        calculated if N is given) For square matrices n is almost always m.
6889 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6890 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6891 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6892 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6893 .   a - matrix values
6894 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6895 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6896 -   oa - matrix values
6897 
6898    Output Parameter:
6899 .   mat - the matrix
6900 
6901    Level: advanced
6902 
6903    Notes:
6904        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6905        must free the arrays once the matrix has been destroyed and not before.
6906 
6907        The i and j indices are 0 based
6908 
6909        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6910 
6911        This sets local rows and cannot be used to set off-processor values.
6912 
6913        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6914        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6915        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6916        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6917        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6918        communication if it is known that only local entries will be set.
6919 
6920 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6921           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6922 @*/
6923 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6924 {
6925   Mat_MPIAIJ *maij;
6926 
6927   PetscFunctionBegin;
6928   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6929   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6930   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6931   PetscCall(MatCreate(comm, mat));
6932   PetscCall(MatSetSizes(*mat, m, n, M, N));
6933   PetscCall(MatSetType(*mat, MATMPIAIJ));
6934   maij = (Mat_MPIAIJ *)(*mat)->data;
6935 
6936   (*mat)->preallocated = PETSC_TRUE;
6937 
6938   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6939   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6940 
6941   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6942   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6943 
6944   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6945   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6946   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6947   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6948   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6949   PetscFunctionReturn(0);
6950 }
6951 
6952 typedef struct {
6953   Mat       *mp;    /* intermediate products */
6954   PetscBool *mptmp; /* is the intermediate product temporary ? */
6955   PetscInt   cp;    /* number of intermediate products */
6956 
6957   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6958   PetscInt    *startsj_s, *startsj_r;
6959   PetscScalar *bufa;
6960   Mat          P_oth;
6961 
6962   /* may take advantage of merging product->B */
6963   Mat Bloc; /* B-local by merging diag and off-diag */
6964 
6965   /* cusparse does not have support to split between symbolic and numeric phases.
6966      When api_user is true, we don't need to update the numerical values
6967      of the temporary storage */
6968   PetscBool reusesym;
6969 
6970   /* support for COO values insertion */
6971   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6972   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6973   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6974   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6975   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6976   PetscMemType mtype;
6977 
6978   /* customization */
6979   PetscBool abmerge;
6980   PetscBool P_oth_bind;
6981 } MatMatMPIAIJBACKEND;
6982 
6983 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6984 {
6985   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6986   PetscInt             i;
6987 
6988   PetscFunctionBegin;
6989   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6990   PetscCall(PetscFree(mmdata->bufa));
6991   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6992   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6993   PetscCall(MatDestroy(&mmdata->P_oth));
6994   PetscCall(MatDestroy(&mmdata->Bloc));
6995   PetscCall(PetscSFDestroy(&mmdata->sf));
6996   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6997   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6998   PetscCall(PetscFree(mmdata->own[0]));
6999   PetscCall(PetscFree(mmdata->own));
7000   PetscCall(PetscFree(mmdata->off[0]));
7001   PetscCall(PetscFree(mmdata->off));
7002   PetscCall(PetscFree(mmdata));
7003   PetscFunctionReturn(0);
7004 }
7005 
7006 /* Copy selected n entries with indices in idx[] of A to v[].
7007    If idx is NULL, copy the whole data array of A to v[]
7008  */
7009 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7010 {
7011   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7012 
7013   PetscFunctionBegin;
7014   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7015   if (f) {
7016     PetscCall((*f)(A, n, idx, v));
7017   } else {
7018     const PetscScalar *vv;
7019 
7020     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7021     if (n && idx) {
7022       PetscScalar    *w  = v;
7023       const PetscInt *oi = idx;
7024       PetscInt        j;
7025 
7026       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7027     } else {
7028       PetscCall(PetscArraycpy(v, vv, n));
7029     }
7030     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7031   }
7032   PetscFunctionReturn(0);
7033 }
7034 
7035 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7036 {
7037   MatMatMPIAIJBACKEND *mmdata;
7038   PetscInt             i, n_d, n_o;
7039 
7040   PetscFunctionBegin;
7041   MatCheckProduct(C, 1);
7042   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7043   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7044   if (!mmdata->reusesym) { /* update temporary matrices */
7045     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7046     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7047   }
7048   mmdata->reusesym = PETSC_FALSE;
7049 
7050   for (i = 0; i < mmdata->cp; i++) {
7051     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7052     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7053   }
7054   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7055     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7056 
7057     if (mmdata->mptmp[i]) continue;
7058     if (noff) {
7059       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7060 
7061       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7062       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7063       n_o += noff;
7064       n_d += nown;
7065     } else {
7066       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7067 
7068       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7069       n_d += mm->nz;
7070     }
7071   }
7072   if (mmdata->hasoffproc) { /* offprocess insertion */
7073     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7074     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7075   }
7076   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7077   PetscFunctionReturn(0);
7078 }
7079 
7080 /* Support for Pt * A, A * P, or Pt * A * P */
7081 #define MAX_NUMBER_INTERMEDIATE 4
7082 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7083 {
7084   Mat_Product           *product = C->product;
7085   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7086   Mat_MPIAIJ            *a, *p;
7087   MatMatMPIAIJBACKEND   *mmdata;
7088   ISLocalToGlobalMapping P_oth_l2g = NULL;
7089   IS                     glob      = NULL;
7090   const char            *prefix;
7091   char                   pprefix[256];
7092   const PetscInt        *globidx, *P_oth_idx;
7093   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7094   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7095   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7096                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7097                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7098   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7099 
7100   MatProductType ptype;
7101   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7102   PetscMPIInt    size;
7103 
7104   PetscFunctionBegin;
7105   MatCheckProduct(C, 1);
7106   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7107   ptype = product->type;
7108   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7109     ptype                                          = MATPRODUCT_AB;
7110     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7111   }
7112   switch (ptype) {
7113   case MATPRODUCT_AB:
7114     A          = product->A;
7115     P          = product->B;
7116     m          = A->rmap->n;
7117     n          = P->cmap->n;
7118     M          = A->rmap->N;
7119     N          = P->cmap->N;
7120     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7121     break;
7122   case MATPRODUCT_AtB:
7123     P          = product->A;
7124     A          = product->B;
7125     m          = P->cmap->n;
7126     n          = A->cmap->n;
7127     M          = P->cmap->N;
7128     N          = A->cmap->N;
7129     hasoffproc = PETSC_TRUE;
7130     break;
7131   case MATPRODUCT_PtAP:
7132     A          = product->A;
7133     P          = product->B;
7134     m          = P->cmap->n;
7135     n          = P->cmap->n;
7136     M          = P->cmap->N;
7137     N          = P->cmap->N;
7138     hasoffproc = PETSC_TRUE;
7139     break;
7140   default:
7141     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7142   }
7143   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7144   if (size == 1) hasoffproc = PETSC_FALSE;
7145 
7146   /* defaults */
7147   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7148     mp[i]    = NULL;
7149     mptmp[i] = PETSC_FALSE;
7150     rmapt[i] = -1;
7151     cmapt[i] = -1;
7152     rmapa[i] = NULL;
7153     cmapa[i] = NULL;
7154   }
7155 
7156   /* customization */
7157   PetscCall(PetscNew(&mmdata));
7158   mmdata->reusesym = product->api_user;
7159   if (ptype == MATPRODUCT_AB) {
7160     if (product->api_user) {
7161       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7162       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7163       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7164       PetscOptionsEnd();
7165     } else {
7166       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7167       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7168       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7169       PetscOptionsEnd();
7170     }
7171   } else if (ptype == MATPRODUCT_PtAP) {
7172     if (product->api_user) {
7173       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7174       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7175       PetscOptionsEnd();
7176     } else {
7177       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7178       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7179       PetscOptionsEnd();
7180     }
7181   }
7182   a = (Mat_MPIAIJ *)A->data;
7183   p = (Mat_MPIAIJ *)P->data;
7184   PetscCall(MatSetSizes(C, m, n, M, N));
7185   PetscCall(PetscLayoutSetUp(C->rmap));
7186   PetscCall(PetscLayoutSetUp(C->cmap));
7187   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7188   PetscCall(MatGetOptionsPrefix(C, &prefix));
7189 
7190   cp = 0;
7191   switch (ptype) {
7192   case MATPRODUCT_AB: /* A * P */
7193     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7194 
7195     /* A_diag * P_local (merged or not) */
7196     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7197       /* P is product->B */
7198       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7199       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7200       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7201       PetscCall(MatProductSetFill(mp[cp], product->fill));
7202       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7203       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7204       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7205       mp[cp]->product->api_user = product->api_user;
7206       PetscCall(MatProductSetFromOptions(mp[cp]));
7207       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7208       PetscCall(ISGetIndices(glob, &globidx));
7209       rmapt[cp] = 1;
7210       cmapt[cp] = 2;
7211       cmapa[cp] = globidx;
7212       mptmp[cp] = PETSC_FALSE;
7213       cp++;
7214     } else { /* A_diag * P_diag and A_diag * P_off */
7215       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7216       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7217       PetscCall(MatProductSetFill(mp[cp], product->fill));
7218       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7219       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7220       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7221       mp[cp]->product->api_user = product->api_user;
7222       PetscCall(MatProductSetFromOptions(mp[cp]));
7223       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7224       rmapt[cp] = 1;
7225       cmapt[cp] = 1;
7226       mptmp[cp] = PETSC_FALSE;
7227       cp++;
7228       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7229       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7230       PetscCall(MatProductSetFill(mp[cp], product->fill));
7231       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7232       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7233       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7234       mp[cp]->product->api_user = product->api_user;
7235       PetscCall(MatProductSetFromOptions(mp[cp]));
7236       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7237       rmapt[cp] = 1;
7238       cmapt[cp] = 2;
7239       cmapa[cp] = p->garray;
7240       mptmp[cp] = PETSC_FALSE;
7241       cp++;
7242     }
7243 
7244     /* A_off * P_other */
7245     if (mmdata->P_oth) {
7246       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7247       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7248       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7249       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7250       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7251       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7252       PetscCall(MatProductSetFill(mp[cp], product->fill));
7253       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7254       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7255       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7256       mp[cp]->product->api_user = product->api_user;
7257       PetscCall(MatProductSetFromOptions(mp[cp]));
7258       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7259       rmapt[cp] = 1;
7260       cmapt[cp] = 2;
7261       cmapa[cp] = P_oth_idx;
7262       mptmp[cp] = PETSC_FALSE;
7263       cp++;
7264     }
7265     break;
7266 
7267   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7268     /* A is product->B */
7269     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7270     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7271       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7272       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7273       PetscCall(MatProductSetFill(mp[cp], product->fill));
7274       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7275       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7276       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7277       mp[cp]->product->api_user = product->api_user;
7278       PetscCall(MatProductSetFromOptions(mp[cp]));
7279       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7280       PetscCall(ISGetIndices(glob, &globidx));
7281       rmapt[cp] = 2;
7282       rmapa[cp] = globidx;
7283       cmapt[cp] = 2;
7284       cmapa[cp] = globidx;
7285       mptmp[cp] = PETSC_FALSE;
7286       cp++;
7287     } else {
7288       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7289       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7290       PetscCall(MatProductSetFill(mp[cp], product->fill));
7291       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7292       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7293       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7294       mp[cp]->product->api_user = product->api_user;
7295       PetscCall(MatProductSetFromOptions(mp[cp]));
7296       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7297       PetscCall(ISGetIndices(glob, &globidx));
7298       rmapt[cp] = 1;
7299       cmapt[cp] = 2;
7300       cmapa[cp] = globidx;
7301       mptmp[cp] = PETSC_FALSE;
7302       cp++;
7303       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7304       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7305       PetscCall(MatProductSetFill(mp[cp], product->fill));
7306       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7307       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7308       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7309       mp[cp]->product->api_user = product->api_user;
7310       PetscCall(MatProductSetFromOptions(mp[cp]));
7311       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7312       rmapt[cp] = 2;
7313       rmapa[cp] = p->garray;
7314       cmapt[cp] = 2;
7315       cmapa[cp] = globidx;
7316       mptmp[cp] = PETSC_FALSE;
7317       cp++;
7318     }
7319     break;
7320   case MATPRODUCT_PtAP:
7321     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7322     /* P is product->B */
7323     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7324     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7325     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7326     PetscCall(MatProductSetFill(mp[cp], product->fill));
7327     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7328     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7329     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7330     mp[cp]->product->api_user = product->api_user;
7331     PetscCall(MatProductSetFromOptions(mp[cp]));
7332     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7333     PetscCall(ISGetIndices(glob, &globidx));
7334     rmapt[cp] = 2;
7335     rmapa[cp] = globidx;
7336     cmapt[cp] = 2;
7337     cmapa[cp] = globidx;
7338     mptmp[cp] = PETSC_FALSE;
7339     cp++;
7340     if (mmdata->P_oth) {
7341       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7342       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7343       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7344       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7345       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7346       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7347       PetscCall(MatProductSetFill(mp[cp], product->fill));
7348       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7349       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7350       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7351       mp[cp]->product->api_user = product->api_user;
7352       PetscCall(MatProductSetFromOptions(mp[cp]));
7353       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7354       mptmp[cp] = PETSC_TRUE;
7355       cp++;
7356       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7357       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7358       PetscCall(MatProductSetFill(mp[cp], product->fill));
7359       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7360       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7361       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7362       mp[cp]->product->api_user = product->api_user;
7363       PetscCall(MatProductSetFromOptions(mp[cp]));
7364       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7365       rmapt[cp] = 2;
7366       rmapa[cp] = globidx;
7367       cmapt[cp] = 2;
7368       cmapa[cp] = P_oth_idx;
7369       mptmp[cp] = PETSC_FALSE;
7370       cp++;
7371     }
7372     break;
7373   default:
7374     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7375   }
7376   /* sanity check */
7377   if (size > 1)
7378     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7379 
7380   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7381   for (i = 0; i < cp; i++) {
7382     mmdata->mp[i]    = mp[i];
7383     mmdata->mptmp[i] = mptmp[i];
7384   }
7385   mmdata->cp             = cp;
7386   C->product->data       = mmdata;
7387   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7388   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7389 
7390   /* memory type */
7391   mmdata->mtype = PETSC_MEMTYPE_HOST;
7392   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7393   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7394   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7395   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7396   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7397   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7398 
7399   /* prepare coo coordinates for values insertion */
7400 
7401   /* count total nonzeros of those intermediate seqaij Mats
7402     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7403     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7404     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7405   */
7406   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7407     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7408     if (mptmp[cp]) continue;
7409     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7410       const PetscInt *rmap = rmapa[cp];
7411       const PetscInt  mr   = mp[cp]->rmap->n;
7412       const PetscInt  rs   = C->rmap->rstart;
7413       const PetscInt  re   = C->rmap->rend;
7414       const PetscInt *ii   = mm->i;
7415       for (i = 0; i < mr; i++) {
7416         const PetscInt gr = rmap[i];
7417         const PetscInt nz = ii[i + 1] - ii[i];
7418         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7419         else ncoo_oown += nz;                  /* this row is local */
7420       }
7421     } else ncoo_d += mm->nz;
7422   }
7423 
7424   /*
7425     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7426 
7427     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7428 
7429     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7430 
7431     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7432     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7433     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7434 
7435     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7436     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7437   */
7438   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7439   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7440 
7441   /* gather (i,j) of nonzeros inserted by remote procs */
7442   if (hasoffproc) {
7443     PetscSF  msf;
7444     PetscInt ncoo2, *coo_i2, *coo_j2;
7445 
7446     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7447     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7448     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7449 
7450     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7451       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7452       PetscInt   *idxoff = mmdata->off[cp];
7453       PetscInt   *idxown = mmdata->own[cp];
7454       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7455         const PetscInt *rmap = rmapa[cp];
7456         const PetscInt *cmap = cmapa[cp];
7457         const PetscInt *ii   = mm->i;
7458         PetscInt       *coi  = coo_i + ncoo_o;
7459         PetscInt       *coj  = coo_j + ncoo_o;
7460         const PetscInt  mr   = mp[cp]->rmap->n;
7461         const PetscInt  rs   = C->rmap->rstart;
7462         const PetscInt  re   = C->rmap->rend;
7463         const PetscInt  cs   = C->cmap->rstart;
7464         for (i = 0; i < mr; i++) {
7465           const PetscInt *jj = mm->j + ii[i];
7466           const PetscInt  gr = rmap[i];
7467           const PetscInt  nz = ii[i + 1] - ii[i];
7468           if (gr < rs || gr >= re) { /* this is an offproc row */
7469             for (j = ii[i]; j < ii[i + 1]; j++) {
7470               *coi++    = gr;
7471               *idxoff++ = j;
7472             }
7473             if (!cmapt[cp]) { /* already global */
7474               for (j = 0; j < nz; j++) *coj++ = jj[j];
7475             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7476               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7477             } else { /* offdiag */
7478               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7479             }
7480             ncoo_o += nz;
7481           } else { /* this is a local row */
7482             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7483           }
7484         }
7485       }
7486       mmdata->off[cp + 1] = idxoff;
7487       mmdata->own[cp + 1] = idxown;
7488     }
7489 
7490     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7491     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7492     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7493     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7494     ncoo = ncoo_d + ncoo_oown + ncoo2;
7495     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7496     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7497     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7498     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7499     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7500     PetscCall(PetscFree2(coo_i, coo_j));
7501     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7502     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7503     coo_i = coo_i2;
7504     coo_j = coo_j2;
7505   } else { /* no offproc values insertion */
7506     ncoo = ncoo_d;
7507     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7508 
7509     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7510     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7511     PetscCall(PetscSFSetUp(mmdata->sf));
7512   }
7513   mmdata->hasoffproc = hasoffproc;
7514 
7515   /* gather (i,j) of nonzeros inserted locally */
7516   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7517     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7518     PetscInt       *coi  = coo_i + ncoo_d;
7519     PetscInt       *coj  = coo_j + ncoo_d;
7520     const PetscInt *jj   = mm->j;
7521     const PetscInt *ii   = mm->i;
7522     const PetscInt *cmap = cmapa[cp];
7523     const PetscInt *rmap = rmapa[cp];
7524     const PetscInt  mr   = mp[cp]->rmap->n;
7525     const PetscInt  rs   = C->rmap->rstart;
7526     const PetscInt  re   = C->rmap->rend;
7527     const PetscInt  cs   = C->cmap->rstart;
7528 
7529     if (mptmp[cp]) continue;
7530     if (rmapt[cp] == 1) { /* consecutive rows */
7531       /* fill coo_i */
7532       for (i = 0; i < mr; i++) {
7533         const PetscInt gr = i + rs;
7534         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7535       }
7536       /* fill coo_j */
7537       if (!cmapt[cp]) { /* type-0, already global */
7538         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7539       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7540         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7541       } else {                                            /* type-2, local to global for sparse columns */
7542         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7543       }
7544       ncoo_d += mm->nz;
7545     } else if (rmapt[cp] == 2) { /* sparse rows */
7546       for (i = 0; i < mr; i++) {
7547         const PetscInt *jj = mm->j + ii[i];
7548         const PetscInt  gr = rmap[i];
7549         const PetscInt  nz = ii[i + 1] - ii[i];
7550         if (gr >= rs && gr < re) { /* local rows */
7551           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7552           if (!cmapt[cp]) { /* type-0, already global */
7553             for (j = 0; j < nz; j++) *coj++ = jj[j];
7554           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7555             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7556           } else { /* type-2, local to global for sparse columns */
7557             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7558           }
7559           ncoo_d += nz;
7560         }
7561       }
7562     }
7563   }
7564   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7565   PetscCall(ISDestroy(&glob));
7566   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7567   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7568   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7569   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7570 
7571   /* preallocate with COO data */
7572   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7573   PetscCall(PetscFree2(coo_i, coo_j));
7574   PetscFunctionReturn(0);
7575 }
7576 
7577 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7578 {
7579   Mat_Product *product = mat->product;
7580 #if defined(PETSC_HAVE_DEVICE)
7581   PetscBool match  = PETSC_FALSE;
7582   PetscBool usecpu = PETSC_FALSE;
7583 #else
7584   PetscBool match = PETSC_TRUE;
7585 #endif
7586 
7587   PetscFunctionBegin;
7588   MatCheckProduct(mat, 1);
7589 #if defined(PETSC_HAVE_DEVICE)
7590   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7591   if (match) { /* we can always fallback to the CPU if requested */
7592     switch (product->type) {
7593     case MATPRODUCT_AB:
7594       if (product->api_user) {
7595         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7596         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7597         PetscOptionsEnd();
7598       } else {
7599         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7600         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7601         PetscOptionsEnd();
7602       }
7603       break;
7604     case MATPRODUCT_AtB:
7605       if (product->api_user) {
7606         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7607         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7608         PetscOptionsEnd();
7609       } else {
7610         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7611         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7612         PetscOptionsEnd();
7613       }
7614       break;
7615     case MATPRODUCT_PtAP:
7616       if (product->api_user) {
7617         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7618         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7619         PetscOptionsEnd();
7620       } else {
7621         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7622         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7623         PetscOptionsEnd();
7624       }
7625       break;
7626     default:
7627       break;
7628     }
7629     match = (PetscBool)!usecpu;
7630   }
7631 #endif
7632   if (match) {
7633     switch (product->type) {
7634     case MATPRODUCT_AB:
7635     case MATPRODUCT_AtB:
7636     case MATPRODUCT_PtAP:
7637       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7638       break;
7639     default:
7640       break;
7641     }
7642   }
7643   /* fallback to MPIAIJ ops */
7644   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7645   PetscFunctionReturn(0);
7646 }
7647 
7648 /*
7649    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7650 
7651    n - the number of block indices in cc[]
7652    cc - the block indices (must be large enough to contain the indices)
7653 */
7654 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7655 {
7656   PetscInt        cnt = -1, nidx, j;
7657   const PetscInt *idx;
7658 
7659   PetscFunctionBegin;
7660   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7661   if (nidx) {
7662     cnt     = 0;
7663     cc[cnt] = idx[0] / bs;
7664     for (j = 1; j < nidx; j++) {
7665       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7666     }
7667   }
7668   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7669   *n = cnt + 1;
7670   PetscFunctionReturn(0);
7671 }
7672 
7673 /*
7674     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7675 
7676     ncollapsed - the number of block indices
7677     collapsed - the block indices (must be large enough to contain the indices)
7678 */
7679 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7680 {
7681   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7682 
7683   PetscFunctionBegin;
7684   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7685   for (i = start + 1; i < start + bs; i++) {
7686     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7687     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7688     cprevtmp = cprev;
7689     cprev    = merged;
7690     merged   = cprevtmp;
7691   }
7692   *ncollapsed = nprev;
7693   if (collapsed) *collapsed = cprev;
7694   PetscFunctionReturn(0);
7695 }
7696 
7697 /*
7698    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7699 */
7700 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7701 {
7702   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7703   Mat                tGmat;
7704   MPI_Comm           comm;
7705   const PetscScalar *vals;
7706   const PetscInt    *idx;
7707   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7708   MatScalar         *AA; // this is checked in graph
7709   PetscBool          isseqaij;
7710   Mat                a, b, c;
7711   MatType            jtype;
7712 
7713   PetscFunctionBegin;
7714   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7715   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7716   PetscCall(MatGetType(Gmat, &jtype));
7717   PetscCall(MatCreate(comm, &tGmat));
7718   PetscCall(MatSetType(tGmat, jtype));
7719 
7720   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7721                Also, if the matrix is symmetric, can we skip this
7722                operation? It can be very expensive on large matrices. */
7723 
7724   // global sizes
7725   PetscCall(MatGetSize(Gmat, &MM, &NN));
7726   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7727   nloc = Iend - Istart;
7728   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7729   if (isseqaij) {
7730     a = Gmat;
7731     b = NULL;
7732   } else {
7733     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7734     a             = d->A;
7735     b             = d->B;
7736     garray        = d->garray;
7737   }
7738   /* Determine upper bound on non-zeros needed in new filtered matrix */
7739   for (PetscInt row = 0; row < nloc; row++) {
7740     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7741     d_nnz[row] = ncols;
7742     if (ncols > maxcols) maxcols = ncols;
7743     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7744   }
7745   if (b) {
7746     for (PetscInt row = 0; row < nloc; row++) {
7747       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7748       o_nnz[row] = ncols;
7749       if (ncols > maxcols) maxcols = ncols;
7750       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7751     }
7752   }
7753   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7754   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7755   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7756   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7757   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7758   PetscCall(PetscFree2(d_nnz, o_nnz));
7759   //
7760   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7761   nnz0 = nnz1 = 0;
7762   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7763     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7764       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7765       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7766         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7767         if (PetscRealPart(sv) > vfilter) {
7768           nnz1++;
7769           PetscInt cid = idx[jj] + Istart; //diag
7770           if (c != a) cid = garray[idx[jj]];
7771           AA[ncol_row] = vals[jj];
7772           AJ[ncol_row] = cid;
7773           ncol_row++;
7774         }
7775       }
7776       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7777       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7778     }
7779   }
7780   PetscCall(PetscFree2(AA, AJ));
7781   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7782   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7783   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7784 
7785   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7786 
7787   *filteredG = tGmat;
7788   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7789   PetscFunctionReturn(0);
7790 }
7791 
7792 /*
7793  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7794 
7795  Input Parameter:
7796  . Amat - matrix
7797  - symmetrize - make the result symmetric
7798  + scale - scale with diagonal
7799 
7800  Output Parameter:
7801  . a_Gmat - output scalar graph >= 0
7802 
7803  */
7804 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7805 {
7806   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7807   MPI_Comm  comm;
7808   Mat       Gmat;
7809   PetscBool ismpiaij, isseqaij;
7810   Mat       a, b, c;
7811   MatType   jtype;
7812 
7813   PetscFunctionBegin;
7814   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7815   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7816   PetscCall(MatGetSize(Amat, &MM, &NN));
7817   PetscCall(MatGetBlockSize(Amat, &bs));
7818   nloc = (Iend - Istart) / bs;
7819 
7820   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7821   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7822   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7823 
7824   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7825   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7826      implementation */
7827   if (bs > 1) {
7828     PetscCall(MatGetType(Amat, &jtype));
7829     PetscCall(MatCreate(comm, &Gmat));
7830     PetscCall(MatSetType(Gmat, jtype));
7831     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7832     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7833     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7834       PetscInt  *d_nnz, *o_nnz;
7835       MatScalar *aa, val, AA[4096];
7836       PetscInt  *aj, *ai, AJ[4096], nc;
7837       if (isseqaij) {
7838         a = Amat;
7839         b = NULL;
7840       } else {
7841         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7842         a             = d->A;
7843         b             = d->B;
7844       }
7845       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7846       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7847       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7848         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0;
7849         const PetscInt *cols;
7850         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7851           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7852           nnz[brow / bs] = jj / bs;
7853           if (jj % bs) ok = 0;
7854           if (cols) j0 = cols[0];
7855           else j0 = -1;
7856           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7857           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7858           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7859             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7860             if (jj % bs) ok = 0;
7861             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7862             if (nnz[brow / bs] != jj / bs) ok = 0;
7863             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7864           }
7865           if (!ok) {
7866             PetscCall(PetscFree2(d_nnz, o_nnz));
7867             goto old_bs;
7868           }
7869         }
7870         PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax);
7871       }
7872       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7873       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7874       PetscCall(PetscFree2(d_nnz, o_nnz));
7875       // diag
7876       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7877         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7878         ai               = aseq->i;
7879         n                = ai[brow + 1] - ai[brow];
7880         aj               = aseq->j + ai[brow];
7881         for (int k = 0; k < n; k += bs) {        // block columns
7882           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7883           val        = 0;
7884           for (int ii = 0; ii < bs; ii++) { // rows in block
7885             aa = aseq->a + ai[brow + ii] + k;
7886             for (int jj = 0; jj < bs; jj++) {         // columns in block
7887               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7888             }
7889           }
7890           AA[k / bs] = val;
7891         }
7892         grow = Istart / bs + brow / bs;
7893         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7894       }
7895       // off-diag
7896       if (ismpiaij) {
7897         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7898         const PetscScalar *vals;
7899         const PetscInt    *cols, *garray = aij->garray;
7900         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7901         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7902           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7903           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7904             AA[k / bs] = 0;
7905             AJ[cidx]   = garray[cols[k]] / bs;
7906           }
7907           nc = ncols / bs;
7908           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7909           for (int ii = 0; ii < bs; ii++) { // rows in block
7910             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7911             for (int k = 0; k < ncols; k += bs) {
7912               for (int jj = 0; jj < bs; jj++) { // cols in block
7913                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7914               }
7915             }
7916             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7917           }
7918           grow = Istart / bs + brow / bs;
7919           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7920         }
7921       }
7922       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7923       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7924     } else {
7925       const PetscScalar *vals;
7926       const PetscInt    *idx;
7927       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7928     old_bs:
7929       /*
7930        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7931        */
7932       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7933       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7934       if (isseqaij) {
7935         PetscInt max_d_nnz;
7936         /*
7937          Determine exact preallocation count for (sequential) scalar matrix
7938          */
7939         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7940         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7941         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7942         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7943         PetscCall(PetscFree3(w0, w1, w2));
7944       } else if (ismpiaij) {
7945         Mat             Daij, Oaij;
7946         const PetscInt *garray;
7947         PetscInt        max_d_nnz;
7948         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7949         /*
7950          Determine exact preallocation count for diagonal block portion of scalar matrix
7951          */
7952         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7953         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7954         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7955         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7956         PetscCall(PetscFree3(w0, w1, w2));
7957         /*
7958          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7959          */
7960         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7961           o_nnz[jj] = 0;
7962           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7963             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7964             o_nnz[jj] += ncols;
7965             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7966           }
7967           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7968         }
7969       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7970       /* get scalar copy (norms) of matrix */
7971       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7972       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7973       PetscCall(PetscFree2(d_nnz, o_nnz));
7974       for (Ii = Istart; Ii < Iend; Ii++) {
7975         PetscInt dest_row = Ii / bs;
7976         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7977         for (jj = 0; jj < ncols; jj++) {
7978           PetscInt    dest_col = idx[jj] / bs;
7979           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7980           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7981         }
7982         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7983       }
7984       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7985       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7986     }
7987   } else {
7988     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7989     else {
7990       Gmat = Amat;
7991       PetscCall(PetscObjectReference((PetscObject)Gmat));
7992     }
7993     if (isseqaij) {
7994       a = Gmat;
7995       b = NULL;
7996     } else {
7997       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7998       a             = d->A;
7999       b             = d->B;
8000     }
8001     if (filter >= 0 || scale) {
8002       /* take absolute value of each entry */
8003       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8004         MatInfo      info;
8005         PetscScalar *avals;
8006         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8007         PetscCall(MatSeqAIJGetArray(c, &avals));
8008         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8009         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8010       }
8011     }
8012   }
8013   if (symmetrize) {
8014     PetscBool isset, issym;
8015     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8016     if (!isset || !issym) {
8017       Mat matTrans;
8018       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8019       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8020       PetscCall(MatDestroy(&matTrans));
8021     }
8022     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8023   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8024   if (scale) {
8025     /* scale c for all diagonal values = 1 or -1 */
8026     Vec diag;
8027     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8028     PetscCall(MatGetDiagonal(Gmat, diag));
8029     PetscCall(VecReciprocal(diag));
8030     PetscCall(VecSqrtAbs(diag));
8031     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8032     PetscCall(VecDestroy(&diag));
8033   }
8034   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8035 
8036   if (filter >= 0) {
8037     Mat Fmat = NULL; /* some silly compiler needs this */
8038 
8039     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8040     PetscCall(MatDestroy(&Gmat));
8041     Gmat = Fmat;
8042   }
8043   *a_Gmat = Gmat;
8044   PetscFunctionReturn(0);
8045 }
8046 
8047 /*
8048     Special version for direct calls from Fortran
8049 */
8050 #include <petsc/private/fortranimpl.h>
8051 
8052 /* Change these macros so can be used in void function */
8053 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8054 #undef PetscCall
8055 #define PetscCall(...) \
8056   do { \
8057     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8058     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8059       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8060       return; \
8061     } \
8062   } while (0)
8063 
8064 #undef SETERRQ
8065 #define SETERRQ(comm, ierr, ...) \
8066   do { \
8067     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8068     return; \
8069   } while (0)
8070 
8071 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8072   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8073 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8074   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8075 #else
8076 #endif
8077 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8078 {
8079   Mat         mat = *mmat;
8080   PetscInt    m = *mm, n = *mn;
8081   InsertMode  addv = *maddv;
8082   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8083   PetscScalar value;
8084 
8085   MatCheckPreallocated(mat, 1);
8086   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8087   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8088   {
8089     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8090     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8091     PetscBool roworiented = aij->roworiented;
8092 
8093     /* Some Variables required in the macro */
8094     Mat         A     = aij->A;
8095     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8096     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8097     MatScalar  *aa;
8098     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8099     Mat         B                 = aij->B;
8100     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8101     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8102     MatScalar  *ba;
8103     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8104      * cannot use "#if defined" inside a macro. */
8105     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8106 
8107     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8108     PetscInt   nonew = a->nonew;
8109     MatScalar *ap1, *ap2;
8110 
8111     PetscFunctionBegin;
8112     PetscCall(MatSeqAIJGetArray(A, &aa));
8113     PetscCall(MatSeqAIJGetArray(B, &ba));
8114     for (i = 0; i < m; i++) {
8115       if (im[i] < 0) continue;
8116       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8117       if (im[i] >= rstart && im[i] < rend) {
8118         row      = im[i] - rstart;
8119         lastcol1 = -1;
8120         rp1      = aj + ai[row];
8121         ap1      = aa + ai[row];
8122         rmax1    = aimax[row];
8123         nrow1    = ailen[row];
8124         low1     = 0;
8125         high1    = nrow1;
8126         lastcol2 = -1;
8127         rp2      = bj + bi[row];
8128         ap2      = ba + bi[row];
8129         rmax2    = bimax[row];
8130         nrow2    = bilen[row];
8131         low2     = 0;
8132         high2    = nrow2;
8133 
8134         for (j = 0; j < n; j++) {
8135           if (roworiented) value = v[i * n + j];
8136           else value = v[i + j * m];
8137           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8138           if (in[j] >= cstart && in[j] < cend) {
8139             col = in[j] - cstart;
8140             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8141           } else if (in[j] < 0) continue;
8142           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8143             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8144             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8145           } else {
8146             if (mat->was_assembled) {
8147               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8148 #if defined(PETSC_USE_CTABLE)
8149               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8150               col--;
8151 #else
8152               col = aij->colmap[in[j]] - 1;
8153 #endif
8154               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8155                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8156                 col = in[j];
8157                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8158                 B        = aij->B;
8159                 b        = (Mat_SeqAIJ *)B->data;
8160                 bimax    = b->imax;
8161                 bi       = b->i;
8162                 bilen    = b->ilen;
8163                 bj       = b->j;
8164                 rp2      = bj + bi[row];
8165                 ap2      = ba + bi[row];
8166                 rmax2    = bimax[row];
8167                 nrow2    = bilen[row];
8168                 low2     = 0;
8169                 high2    = nrow2;
8170                 bm       = aij->B->rmap->n;
8171                 ba       = b->a;
8172                 inserted = PETSC_FALSE;
8173               }
8174             } else col = in[j];
8175             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8176           }
8177         }
8178       } else if (!aij->donotstash) {
8179         if (roworiented) {
8180           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8181         } else {
8182           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8183         }
8184       }
8185     }
8186     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8187     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8188   }
8189   PetscFunctionReturnVoid();
8190 }
8191 
8192 /* Undefining these here since they were redefined from their original definition above! No
8193  * other PETSc functions should be defined past this point, as it is impossible to recover the
8194  * original definitions */
8195 #undef PetscCall
8196 #undef SETERRQ
8197