xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f66216f923c17d4e70adfb7fbce8bc2d3739ff05)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287   PetscMPIInt        in;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   PetscCall(PetscMPIIntCast(n, &in));
313   if (type == NORM_INFINITY) {
314     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
315   } else {
316     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
317   }
318   PetscCall(PetscFree(work));
319   if (type == NORM_2) {
320     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
321   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
322     for (i = 0; i < n; i++) reductions[i] /= m;
323   }
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
328 {
329   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
330   IS              sis, gis;
331   const PetscInt *isis, *igis;
332   PetscInt        n, *iis, nsis, ngis, rstart, i;
333 
334   PetscFunctionBegin;
335   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
336   PetscCall(MatFindNonzeroRows(a->B, &gis));
337   PetscCall(ISGetSize(gis, &ngis));
338   PetscCall(ISGetSize(sis, &nsis));
339   PetscCall(ISGetIndices(sis, &isis));
340   PetscCall(ISGetIndices(gis, &igis));
341 
342   PetscCall(PetscMalloc1(ngis + nsis, &iis));
343   PetscCall(PetscArraycpy(iis, igis, ngis));
344   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
345   n = ngis + nsis;
346   PetscCall(PetscSortRemoveDupsInt(&n, iis));
347   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
348   for (i = 0; i < n; i++) iis[i] += rstart;
349   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
350 
351   PetscCall(ISRestoreIndices(sis, &isis));
352   PetscCall(ISRestoreIndices(gis, &igis));
353   PetscCall(ISDestroy(&sis));
354   PetscCall(ISDestroy(&gis));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*
359   Local utility routine that creates a mapping from the global column
360 number to the local number in the off-diagonal part of the local
361 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
362 a slightly higher hash table cost; without it it is not scalable (each processor
363 has an order N integer array but is fast to access.
364 */
365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
366 {
367   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
368   PetscInt    n   = aij->B->cmap->n, i;
369 
370   PetscFunctionBegin;
371   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
372 #if defined(PETSC_USE_CTABLE)
373   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
374   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
375 #else
376   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
377   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
378 #endif
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
383   do { \
384     if (col <= lastcol1) low1 = 0; \
385     else high1 = nrow1; \
386     lastcol1 = col; \
387     while (high1 - low1 > 5) { \
388       t = (low1 + high1) / 2; \
389       if (rp1[t] > col) high1 = t; \
390       else low1 = t; \
391     } \
392     for (_i = low1; _i < high1; _i++) { \
393       if (rp1[_i] > col) break; \
394       if (rp1[_i] == col) { \
395         if (addv == ADD_VALUES) { \
396           ap1[_i] += value; \
397           /* Not sure LogFlops will slow dow the code or not */ \
398           (void)PetscLogFlops(1.0); \
399         } else ap1[_i] = value; \
400         goto a_noinsert; \
401       } \
402     } \
403     if (value == 0.0 && ignorezeroentries && row != col) { \
404       low1  = 0; \
405       high1 = nrow1; \
406       goto a_noinsert; \
407     } \
408     if (nonew == 1) { \
409       low1  = 0; \
410       high1 = nrow1; \
411       goto a_noinsert; \
412     } \
413     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
414     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
415     N = nrow1++ - 1; \
416     a->nz++; \
417     high1++; \
418     /* shift up all the later entries in this row */ \
419     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
420     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
421     rp1[_i] = col; \
422     ap1[_i] = value; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
584               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscInt           n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     break;
1690   case MAT_SUBMAT_SINGLEIS:
1691     A->submat_singleis = flg;
1692     break;
1693   case MAT_STRUCTURE_ONLY:
1694     /* The option is handled directly by MatSetOption() */
1695     break;
1696   default:
1697     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1698   }
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1703 {
1704   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1705   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1706   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1707   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1708   PetscInt    *cmap, *idx_p;
1709 
1710   PetscFunctionBegin;
1711   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1712   mat->getrowactive = PETSC_TRUE;
1713 
1714   if (!mat->rowvalues && (idx || v)) {
1715     /*
1716         allocate enough space to hold information from the longest row.
1717     */
1718     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1719     PetscInt    max = 1, tmp;
1720     for (i = 0; i < matin->rmap->n; i++) {
1721       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1722       if (max < tmp) max = tmp;
1723     }
1724     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1725   }
1726 
1727   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1728   lrow = row - rstart;
1729 
1730   pvA = &vworkA;
1731   pcA = &cworkA;
1732   pvB = &vworkB;
1733   pcB = &cworkB;
1734   if (!v) {
1735     pvA = NULL;
1736     pvB = NULL;
1737   }
1738   if (!idx) {
1739     pcA = NULL;
1740     if (!v) pcB = NULL;
1741   }
1742   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1743   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1744   nztot = nzA + nzB;
1745 
1746   cmap = mat->garray;
1747   if (v || idx) {
1748     if (nztot) {
1749       /* Sort by increasing column numbers, assuming A and B already sorted */
1750       PetscInt imark = -1;
1751       if (v) {
1752         *v = v_p = mat->rowvalues;
1753         for (i = 0; i < nzB; i++) {
1754           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1755           else break;
1756         }
1757         imark = i;
1758         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1759         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1760       }
1761       if (idx) {
1762         *idx = idx_p = mat->rowindices;
1763         if (imark > -1) {
1764           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1765         } else {
1766           for (i = 0; i < nzB; i++) {
1767             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1768             else break;
1769           }
1770           imark = i;
1771         }
1772         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1773         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1774       }
1775     } else {
1776       if (idx) *idx = NULL;
1777       if (v) *v = NULL;
1778     }
1779   }
1780   *nz = nztot;
1781   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1782   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1783   PetscFunctionReturn(PETSC_SUCCESS);
1784 }
1785 
1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1787 {
1788   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1789 
1790   PetscFunctionBegin;
1791   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1792   aij->getrowactive = PETSC_FALSE;
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1797 {
1798   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1799   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1800   PetscInt         i, j, cstart = mat->cmap->rstart;
1801   PetscReal        sum = 0.0;
1802   const MatScalar *v, *amata, *bmata;
1803   PetscMPIInt      iN;
1804 
1805   PetscFunctionBegin;
1806   if (aij->size == 1) {
1807     PetscCall(MatNorm(aij->A, type, norm));
1808   } else {
1809     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1810     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1811     if (type == NORM_FROBENIUS) {
1812       v = amata;
1813       for (i = 0; i < amat->nz; i++) {
1814         sum += PetscRealPart(PetscConj(*v) * (*v));
1815         v++;
1816       }
1817       v = bmata;
1818       for (i = 0; i < bmat->nz; i++) {
1819         sum += PetscRealPart(PetscConj(*v) * (*v));
1820         v++;
1821       }
1822       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1823       *norm = PetscSqrtReal(*norm);
1824       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1825     } else if (type == NORM_1) { /* max column norm */
1826       PetscReal *tmp, *tmp2;
1827       PetscInt  *jj, *garray = aij->garray;
1828       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1829       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1830       *norm = 0.0;
1831       v     = amata;
1832       jj    = amat->j;
1833       for (j = 0; j < amat->nz; j++) {
1834         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1835         v++;
1836       }
1837       v  = bmata;
1838       jj = bmat->j;
1839       for (j = 0; j < bmat->nz; j++) {
1840         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1841         v++;
1842       }
1843       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1844       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1845       for (j = 0; j < mat->cmap->N; j++) {
1846         if (tmp2[j] > *norm) *norm = tmp2[j];
1847       }
1848       PetscCall(PetscFree(tmp));
1849       PetscCall(PetscFree(tmp2));
1850       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1851     } else if (type == NORM_INFINITY) { /* max row norm */
1852       PetscReal ntemp = 0.0;
1853       for (j = 0; j < aij->A->rmap->n; j++) {
1854         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1855         sum = 0.0;
1856         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1857           sum += PetscAbsScalar(*v);
1858           v++;
1859         }
1860         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1861         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1862           sum += PetscAbsScalar(*v);
1863           v++;
1864         }
1865         if (sum > ntemp) ntemp = sum;
1866       }
1867       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1868       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1869     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1870     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1871     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1877 {
1878   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1879   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1880   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1881   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1882   Mat              B, A_diag, *B_diag;
1883   const MatScalar *pbv, *bv;
1884 
1885   PetscFunctionBegin;
1886   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1887   ma = A->rmap->n;
1888   na = A->cmap->n;
1889   mb = a->B->rmap->n;
1890   nb = a->B->cmap->n;
1891   ai = Aloc->i;
1892   aj = Aloc->j;
1893   bi = Bloc->i;
1894   bj = Bloc->j;
1895   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1896     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1897     PetscSFNode         *oloc;
1898     PETSC_UNUSED PetscSF sf;
1899 
1900     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1901     /* compute d_nnz for preallocation */
1902     PetscCall(PetscArrayzero(d_nnz, na));
1903     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1904     /* compute local off-diagonal contributions */
1905     PetscCall(PetscArrayzero(g_nnz, nb));
1906     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1907     /* map those to global */
1908     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1909     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1910     PetscCall(PetscSFSetFromOptions(sf));
1911     PetscCall(PetscArrayzero(o_nnz, na));
1912     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1913     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1914     PetscCall(PetscSFDestroy(&sf));
1915 
1916     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1917     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1918     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1919     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1920     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1921     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1922   } else {
1923     B = *matout;
1924     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1925   }
1926 
1927   b           = (Mat_MPIAIJ *)B->data;
1928   A_diag      = a->A;
1929   B_diag      = &b->A;
1930   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1931   A_diag_ncol = A_diag->cmap->N;
1932   B_diag_ilen = sub_B_diag->ilen;
1933   B_diag_i    = sub_B_diag->i;
1934 
1935   /* Set ilen for diagonal of B */
1936   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1937 
1938   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1939   very quickly (=without using MatSetValues), because all writes are local. */
1940   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1941   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1942 
1943   /* copy over the B part */
1944   PetscCall(PetscMalloc1(bi[mb], &cols));
1945   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1946   pbv = bv;
1947   row = A->rmap->rstart;
1948   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1949   cols_tmp = cols;
1950   for (i = 0; i < mb; i++) {
1951     ncol = bi[i + 1] - bi[i];
1952     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1953     row++;
1954     if (pbv) pbv += ncol;
1955     if (cols_tmp) cols_tmp += ncol;
1956   }
1957   PetscCall(PetscFree(cols));
1958   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1959 
1960   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1961   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1962   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1963     *matout = B;
1964   } else {
1965     PetscCall(MatHeaderMerge(A, &B));
1966   }
1967   PetscFunctionReturn(PETSC_SUCCESS);
1968 }
1969 
1970 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1971 {
1972   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1973   Mat         a = aij->A, b = aij->B;
1974   PetscInt    s1, s2, s3;
1975 
1976   PetscFunctionBegin;
1977   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1978   if (rr) {
1979     PetscCall(VecGetLocalSize(rr, &s1));
1980     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1981     /* Overlap communication with computation. */
1982     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1983   }
1984   if (ll) {
1985     PetscCall(VecGetLocalSize(ll, &s1));
1986     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1987     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1988   }
1989   /* scale  the diagonal block */
1990   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1991 
1992   if (rr) {
1993     /* Do a scatter end and then right scale the off-diagonal block */
1994     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1995     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1996   }
1997   PetscFunctionReturn(PETSC_SUCCESS);
1998 }
1999 
2000 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2001 {
2002   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2003 
2004   PetscFunctionBegin;
2005   PetscCall(MatSetUnfactored(a->A));
2006   PetscFunctionReturn(PETSC_SUCCESS);
2007 }
2008 
2009 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2010 {
2011   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2012   Mat         a, b, c, d;
2013   PetscBool   flg;
2014 
2015   PetscFunctionBegin;
2016   a = matA->A;
2017   b = matA->B;
2018   c = matB->A;
2019   d = matB->B;
2020 
2021   PetscCall(MatEqual(a, c, &flg));
2022   if (flg) PetscCall(MatEqual(b, d, &flg));
2023   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2024   PetscFunctionReturn(PETSC_SUCCESS);
2025 }
2026 
2027 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2028 {
2029   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2030   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2031 
2032   PetscFunctionBegin;
2033   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2034   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2035     /* because of the column compression in the off-processor part of the matrix a->B,
2036        the number of columns in a->B and b->B may be different, hence we cannot call
2037        the MatCopy() directly on the two parts. If need be, we can provide a more
2038        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2039        then copying the submatrices */
2040     PetscCall(MatCopy_Basic(A, B, str));
2041   } else {
2042     PetscCall(MatCopy(a->A, b->A, str));
2043     PetscCall(MatCopy(a->B, b->B, str));
2044   }
2045   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2046   PetscFunctionReturn(PETSC_SUCCESS);
2047 }
2048 
2049 /*
2050    Computes the number of nonzeros per row needed for preallocation when X and Y
2051    have different nonzero structure.
2052 */
2053 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2054 {
2055   PetscInt i, j, k, nzx, nzy;
2056 
2057   PetscFunctionBegin;
2058   /* Set the number of nonzeros in the new matrix */
2059   for (i = 0; i < m; i++) {
2060     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2061     nzx    = xi[i + 1] - xi[i];
2062     nzy    = yi[i + 1] - yi[i];
2063     nnz[i] = 0;
2064     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2065       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2066       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2067       nnz[i]++;
2068     }
2069     for (; k < nzy; k++) nnz[i]++;
2070   }
2071   PetscFunctionReturn(PETSC_SUCCESS);
2072 }
2073 
2074 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2075 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2076 {
2077   PetscInt    m = Y->rmap->N;
2078   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2079   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2080 
2081   PetscFunctionBegin;
2082   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2083   PetscFunctionReturn(PETSC_SUCCESS);
2084 }
2085 
2086 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2087 {
2088   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2089 
2090   PetscFunctionBegin;
2091   if (str == SAME_NONZERO_PATTERN) {
2092     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2093     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2094   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2095     PetscCall(MatAXPY_Basic(Y, a, X, str));
2096   } else {
2097     Mat       B;
2098     PetscInt *nnz_d, *nnz_o;
2099 
2100     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2101     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2102     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2103     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2104     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2105     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2106     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2107     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2108     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2109     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2110     PetscCall(MatHeaderMerge(Y, &B));
2111     PetscCall(PetscFree(nnz_d));
2112     PetscCall(PetscFree(nnz_o));
2113   }
2114   PetscFunctionReturn(PETSC_SUCCESS);
2115 }
2116 
2117 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2118 
2119 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2120 {
2121   PetscFunctionBegin;
2122   if (PetscDefined(USE_COMPLEX)) {
2123     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2124 
2125     PetscCall(MatConjugate_SeqAIJ(aij->A));
2126     PetscCall(MatConjugate_SeqAIJ(aij->B));
2127   }
2128   PetscFunctionReturn(PETSC_SUCCESS);
2129 }
2130 
2131 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2132 {
2133   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2134 
2135   PetscFunctionBegin;
2136   PetscCall(MatRealPart(a->A));
2137   PetscCall(MatRealPart(a->B));
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2144 
2145   PetscFunctionBegin;
2146   PetscCall(MatImaginaryPart(a->A));
2147   PetscCall(MatImaginaryPart(a->B));
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2152 {
2153   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2154   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2155   PetscScalar       *vv;
2156   Vec                vB, vA;
2157   const PetscScalar *va, *vb;
2158 
2159   PetscFunctionBegin;
2160   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2161   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2162 
2163   PetscCall(VecGetArrayRead(vA, &va));
2164   if (idx) {
2165     for (i = 0; i < m; i++) {
2166       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2167     }
2168   }
2169 
2170   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2171   PetscCall(PetscMalloc1(m, &idxb));
2172   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2173 
2174   PetscCall(VecGetArrayWrite(v, &vv));
2175   PetscCall(VecGetArrayRead(vB, &vb));
2176   for (i = 0; i < m; i++) {
2177     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2178       vv[i] = vb[i];
2179       if (idx) idx[i] = a->garray[idxb[i]];
2180     } else {
2181       vv[i] = va[i];
2182       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2183     }
2184   }
2185   PetscCall(VecRestoreArrayWrite(v, &vv));
2186   PetscCall(VecRestoreArrayRead(vA, &va));
2187   PetscCall(VecRestoreArrayRead(vB, &vb));
2188   PetscCall(PetscFree(idxb));
2189   PetscCall(VecDestroy(&vA));
2190   PetscCall(VecDestroy(&vB));
2191   PetscFunctionReturn(PETSC_SUCCESS);
2192 }
2193 
2194 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2195 {
2196   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2197   Vec         vB, vA;
2198 
2199   PetscFunctionBegin;
2200   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2201   PetscCall(MatGetRowSumAbs(a->A, vA));
2202   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2203   PetscCall(MatGetRowSumAbs(a->B, vB));
2204   PetscCall(VecAXPY(vA, 1.0, vB));
2205   PetscCall(VecDestroy(&vB));
2206   PetscCall(VecCopy(vA, v));
2207   PetscCall(VecDestroy(&vA));
2208   PetscFunctionReturn(PETSC_SUCCESS);
2209 }
2210 
2211 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2212 {
2213   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2214   PetscInt           m = A->rmap->n, n = A->cmap->n;
2215   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2216   PetscInt          *cmap = mat->garray;
2217   PetscInt          *diagIdx, *offdiagIdx;
2218   Vec                diagV, offdiagV;
2219   PetscScalar       *a, *diagA, *offdiagA;
2220   const PetscScalar *ba, *bav;
2221   PetscInt           r, j, col, ncols, *bi, *bj;
2222   Mat                B = mat->B;
2223   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2224 
2225   PetscFunctionBegin;
2226   /* When a process holds entire A and other processes have no entry */
2227   if (A->cmap->N == n) {
2228     PetscCall(VecGetArrayWrite(v, &diagA));
2229     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2230     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2231     PetscCall(VecDestroy(&diagV));
2232     PetscCall(VecRestoreArrayWrite(v, &diagA));
2233     PetscFunctionReturn(PETSC_SUCCESS);
2234   } else if (n == 0) {
2235     if (m) {
2236       PetscCall(VecGetArrayWrite(v, &a));
2237       for (r = 0; r < m; r++) {
2238         a[r] = 0.0;
2239         if (idx) idx[r] = -1;
2240       }
2241       PetscCall(VecRestoreArrayWrite(v, &a));
2242     }
2243     PetscFunctionReturn(PETSC_SUCCESS);
2244   }
2245 
2246   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2249   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2250 
2251   /* Get offdiagIdx[] for implicit 0.0 */
2252   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2253   ba = bav;
2254   bi = b->i;
2255   bj = b->j;
2256   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2257   for (r = 0; r < m; r++) {
2258     ncols = bi[r + 1] - bi[r];
2259     if (ncols == A->cmap->N - n) { /* Brow is dense */
2260       offdiagA[r]   = *ba;
2261       offdiagIdx[r] = cmap[0];
2262     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2263       offdiagA[r] = 0.0;
2264 
2265       /* Find first hole in the cmap */
2266       for (j = 0; j < ncols; j++) {
2267         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2268         if (col > j && j < cstart) {
2269           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2270           break;
2271         } else if (col > j + n && j >= cstart) {
2272           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2273           break;
2274         }
2275       }
2276       if (j == ncols && ncols < A->cmap->N - n) {
2277         /* a hole is outside compressed Bcols */
2278         if (ncols == 0) {
2279           if (cstart) {
2280             offdiagIdx[r] = 0;
2281           } else offdiagIdx[r] = cend;
2282         } else { /* ncols > 0 */
2283           offdiagIdx[r] = cmap[ncols - 1] + 1;
2284           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2285         }
2286       }
2287     }
2288 
2289     for (j = 0; j < ncols; j++) {
2290       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2291         offdiagA[r]   = *ba;
2292         offdiagIdx[r] = cmap[*bj];
2293       }
2294       ba++;
2295       bj++;
2296     }
2297   }
2298 
2299   PetscCall(VecGetArrayWrite(v, &a));
2300   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2301   for (r = 0; r < m; ++r) {
2302     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2303       a[r] = diagA[r];
2304       if (idx) idx[r] = cstart + diagIdx[r];
2305     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2306       a[r] = diagA[r];
2307       if (idx) {
2308         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2309           idx[r] = cstart + diagIdx[r];
2310         } else idx[r] = offdiagIdx[r];
2311       }
2312     } else {
2313       a[r] = offdiagA[r];
2314       if (idx) idx[r] = offdiagIdx[r];
2315     }
2316   }
2317   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2318   PetscCall(VecRestoreArrayWrite(v, &a));
2319   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2320   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2321   PetscCall(VecDestroy(&diagV));
2322   PetscCall(VecDestroy(&offdiagV));
2323   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2324   PetscFunctionReturn(PETSC_SUCCESS);
2325 }
2326 
2327 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2328 {
2329   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2330   PetscInt           m = A->rmap->n, n = A->cmap->n;
2331   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2332   PetscInt          *cmap = mat->garray;
2333   PetscInt          *diagIdx, *offdiagIdx;
2334   Vec                diagV, offdiagV;
2335   PetscScalar       *a, *diagA, *offdiagA;
2336   const PetscScalar *ba, *bav;
2337   PetscInt           r, j, col, ncols, *bi, *bj;
2338   Mat                B = mat->B;
2339   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2340 
2341   PetscFunctionBegin;
2342   /* When a process holds entire A and other processes have no entry */
2343   if (A->cmap->N == n) {
2344     PetscCall(VecGetArrayWrite(v, &diagA));
2345     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2346     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2347     PetscCall(VecDestroy(&diagV));
2348     PetscCall(VecRestoreArrayWrite(v, &diagA));
2349     PetscFunctionReturn(PETSC_SUCCESS);
2350   } else if (n == 0) {
2351     if (m) {
2352       PetscCall(VecGetArrayWrite(v, &a));
2353       for (r = 0; r < m; r++) {
2354         a[r] = PETSC_MAX_REAL;
2355         if (idx) idx[r] = -1;
2356       }
2357       PetscCall(VecRestoreArrayWrite(v, &a));
2358     }
2359     PetscFunctionReturn(PETSC_SUCCESS);
2360   }
2361 
2362   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2363   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2364   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2365   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2366 
2367   /* Get offdiagIdx[] for implicit 0.0 */
2368   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2369   ba = bav;
2370   bi = b->i;
2371   bj = b->j;
2372   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2373   for (r = 0; r < m; r++) {
2374     ncols = bi[r + 1] - bi[r];
2375     if (ncols == A->cmap->N - n) { /* Brow is dense */
2376       offdiagA[r]   = *ba;
2377       offdiagIdx[r] = cmap[0];
2378     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2379       offdiagA[r] = 0.0;
2380 
2381       /* Find first hole in the cmap */
2382       for (j = 0; j < ncols; j++) {
2383         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2384         if (col > j && j < cstart) {
2385           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2386           break;
2387         } else if (col > j + n && j >= cstart) {
2388           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2389           break;
2390         }
2391       }
2392       if (j == ncols && ncols < A->cmap->N - n) {
2393         /* a hole is outside compressed Bcols */
2394         if (ncols == 0) {
2395           if (cstart) {
2396             offdiagIdx[r] = 0;
2397           } else offdiagIdx[r] = cend;
2398         } else { /* ncols > 0 */
2399           offdiagIdx[r] = cmap[ncols - 1] + 1;
2400           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2401         }
2402       }
2403     }
2404 
2405     for (j = 0; j < ncols; j++) {
2406       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2407         offdiagA[r]   = *ba;
2408         offdiagIdx[r] = cmap[*bj];
2409       }
2410       ba++;
2411       bj++;
2412     }
2413   }
2414 
2415   PetscCall(VecGetArrayWrite(v, &a));
2416   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2417   for (r = 0; r < m; ++r) {
2418     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) idx[r] = cstart + diagIdx[r];
2421     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2422       a[r] = diagA[r];
2423       if (idx) {
2424         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2425           idx[r] = cstart + diagIdx[r];
2426         } else idx[r] = offdiagIdx[r];
2427       }
2428     } else {
2429       a[r] = offdiagA[r];
2430       if (idx) idx[r] = offdiagIdx[r];
2431     }
2432   }
2433   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2434   PetscCall(VecRestoreArrayWrite(v, &a));
2435   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2436   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2437   PetscCall(VecDestroy(&diagV));
2438   PetscCall(VecDestroy(&offdiagV));
2439   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2440   PetscFunctionReturn(PETSC_SUCCESS);
2441 }
2442 
2443 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2444 {
2445   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2446   PetscInt           m = A->rmap->n, n = A->cmap->n;
2447   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2448   PetscInt          *cmap = mat->garray;
2449   PetscInt          *diagIdx, *offdiagIdx;
2450   Vec                diagV, offdiagV;
2451   PetscScalar       *a, *diagA, *offdiagA;
2452   const PetscScalar *ba, *bav;
2453   PetscInt           r, j, col, ncols, *bi, *bj;
2454   Mat                B = mat->B;
2455   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2456 
2457   PetscFunctionBegin;
2458   /* When a process holds entire A and other processes have no entry */
2459   if (A->cmap->N == n) {
2460     PetscCall(VecGetArrayWrite(v, &diagA));
2461     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2462     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2463     PetscCall(VecDestroy(&diagV));
2464     PetscCall(VecRestoreArrayWrite(v, &diagA));
2465     PetscFunctionReturn(PETSC_SUCCESS);
2466   } else if (n == 0) {
2467     if (m) {
2468       PetscCall(VecGetArrayWrite(v, &a));
2469       for (r = 0; r < m; r++) {
2470         a[r] = PETSC_MIN_REAL;
2471         if (idx) idx[r] = -1;
2472       }
2473       PetscCall(VecRestoreArrayWrite(v, &a));
2474     }
2475     PetscFunctionReturn(PETSC_SUCCESS);
2476   }
2477 
2478   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2479   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2480   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2481   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2482 
2483   /* Get offdiagIdx[] for implicit 0.0 */
2484   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2485   ba = bav;
2486   bi = b->i;
2487   bj = b->j;
2488   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2489   for (r = 0; r < m; r++) {
2490     ncols = bi[r + 1] - bi[r];
2491     if (ncols == A->cmap->N - n) { /* Brow is dense */
2492       offdiagA[r]   = *ba;
2493       offdiagIdx[r] = cmap[0];
2494     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2495       offdiagA[r] = 0.0;
2496 
2497       /* Find first hole in the cmap */
2498       for (j = 0; j < ncols; j++) {
2499         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2500         if (col > j && j < cstart) {
2501           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2502           break;
2503         } else if (col > j + n && j >= cstart) {
2504           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2505           break;
2506         }
2507       }
2508       if (j == ncols && ncols < A->cmap->N - n) {
2509         /* a hole is outside compressed Bcols */
2510         if (ncols == 0) {
2511           if (cstart) {
2512             offdiagIdx[r] = 0;
2513           } else offdiagIdx[r] = cend;
2514         } else { /* ncols > 0 */
2515           offdiagIdx[r] = cmap[ncols - 1] + 1;
2516           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2517         }
2518       }
2519     }
2520 
2521     for (j = 0; j < ncols; j++) {
2522       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2523         offdiagA[r]   = *ba;
2524         offdiagIdx[r] = cmap[*bj];
2525       }
2526       ba++;
2527       bj++;
2528     }
2529   }
2530 
2531   PetscCall(VecGetArrayWrite(v, &a));
2532   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2533   for (r = 0; r < m; ++r) {
2534     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2535       a[r] = diagA[r];
2536       if (idx) idx[r] = cstart + diagIdx[r];
2537     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2538       a[r] = diagA[r];
2539       if (idx) {
2540         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2541           idx[r] = cstart + diagIdx[r];
2542         } else idx[r] = offdiagIdx[r];
2543       }
2544     } else {
2545       a[r] = offdiagA[r];
2546       if (idx) idx[r] = offdiagIdx[r];
2547     }
2548   }
2549   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2550   PetscCall(VecRestoreArrayWrite(v, &a));
2551   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2552   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2553   PetscCall(VecDestroy(&diagV));
2554   PetscCall(VecDestroy(&offdiagV));
2555   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2556   PetscFunctionReturn(PETSC_SUCCESS);
2557 }
2558 
2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2560 {
2561   Mat *dummy;
2562 
2563   PetscFunctionBegin;
2564   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2565   *newmat = *dummy;
2566   PetscCall(PetscFree(dummy));
2567   PetscFunctionReturn(PETSC_SUCCESS);
2568 }
2569 
2570 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2571 {
2572   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2573 
2574   PetscFunctionBegin;
2575   PetscCall(MatInvertBlockDiagonal(a->A, values));
2576   A->factorerrortype = a->A->factorerrortype;
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2581 {
2582   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2583 
2584   PetscFunctionBegin;
2585   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2586   PetscCall(MatSetRandom(aij->A, rctx));
2587   if (x->assembled) {
2588     PetscCall(MatSetRandom(aij->B, rctx));
2589   } else {
2590     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2591   }
2592   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2593   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2598 {
2599   PetscFunctionBegin;
2600   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2601   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2602   PetscFunctionReturn(PETSC_SUCCESS);
2603 }
2604 
2605 /*@
2606   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2607 
2608   Not Collective
2609 
2610   Input Parameter:
2611 . A - the matrix
2612 
2613   Output Parameter:
2614 . nz - the number of nonzeros
2615 
2616   Level: advanced
2617 
2618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2619 @*/
2620 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2621 {
2622   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2623   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2624   PetscBool   isaij;
2625 
2626   PetscFunctionBegin;
2627   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2628   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2629   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2630   PetscFunctionReturn(PETSC_SUCCESS);
2631 }
2632 
2633 /*@
2634   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2635 
2636   Collective
2637 
2638   Input Parameters:
2639 + A  - the matrix
2640 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2641 
2642   Level: advanced
2643 
2644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2645 @*/
2646 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2647 {
2648   PetscFunctionBegin;
2649   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
2653 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2654 {
2655   PetscBool sc = PETSC_FALSE, flg;
2656 
2657   PetscFunctionBegin;
2658   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2659   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2660   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2661   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2662   PetscOptionsHeadEnd();
2663   PetscFunctionReturn(PETSC_SUCCESS);
2664 }
2665 
2666 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2667 {
2668   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2669   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2670 
2671   PetscFunctionBegin;
2672   if (!Y->preallocated) {
2673     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2674   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2675     PetscInt nonew = aij->nonew;
2676     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2677     aij->nonew = nonew;
2678   }
2679   PetscCall(MatShift_Basic(Y, a));
2680   PetscFunctionReturn(PETSC_SUCCESS);
2681 }
2682 
2683 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2684 {
2685   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2686 
2687   PetscFunctionBegin;
2688   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2689   PetscCall(MatMissingDiagonal(a->A, missing, d));
2690   if (d) {
2691     PetscInt rstart;
2692     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2693     *d += rstart;
2694   }
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2704   PetscFunctionReturn(PETSC_SUCCESS);
2705 }
2706 
2707 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2708 {
2709   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2710 
2711   PetscFunctionBegin;
2712   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2713   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2714   PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716 
2717 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2718                                        MatGetRow_MPIAIJ,
2719                                        MatRestoreRow_MPIAIJ,
2720                                        MatMult_MPIAIJ,
2721                                        /* 4*/ MatMultAdd_MPIAIJ,
2722                                        MatMultTranspose_MPIAIJ,
2723                                        MatMultTransposeAdd_MPIAIJ,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        /*10*/ NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        MatSOR_MPIAIJ,
2731                                        MatTranspose_MPIAIJ,
2732                                        /*15*/ MatGetInfo_MPIAIJ,
2733                                        MatEqual_MPIAIJ,
2734                                        MatGetDiagonal_MPIAIJ,
2735                                        MatDiagonalScale_MPIAIJ,
2736                                        MatNorm_MPIAIJ,
2737                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2738                                        MatAssemblyEnd_MPIAIJ,
2739                                        MatSetOption_MPIAIJ,
2740                                        MatZeroEntries_MPIAIJ,
2741                                        /*24*/ MatZeroRows_MPIAIJ,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        /*29*/ MatSetUp_MPI_Hash,
2747                                        NULL,
2748                                        NULL,
2749                                        MatGetDiagonalBlock_MPIAIJ,
2750                                        NULL,
2751                                        /*34*/ MatDuplicate_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        /*39*/ MatAXPY_MPIAIJ,
2757                                        MatCreateSubMatrices_MPIAIJ,
2758                                        MatIncreaseOverlap_MPIAIJ,
2759                                        MatGetValues_MPIAIJ,
2760                                        MatCopy_MPIAIJ,
2761                                        /*44*/ MatGetRowMax_MPIAIJ,
2762                                        MatScale_MPIAIJ,
2763                                        MatShift_MPIAIJ,
2764                                        MatDiagonalSet_MPIAIJ,
2765                                        MatZeroRowsColumns_MPIAIJ,
2766                                        /*49*/ MatSetRandom_MPIAIJ,
2767                                        MatGetRowIJ_MPIAIJ,
2768                                        MatRestoreRowIJ_MPIAIJ,
2769                                        NULL,
2770                                        NULL,
2771                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2772                                        NULL,
2773                                        MatSetUnfactored_MPIAIJ,
2774                                        MatPermute_MPIAIJ,
2775                                        NULL,
2776                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2777                                        MatDestroy_MPIAIJ,
2778                                        MatView_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        /*64*/ NULL,
2782                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                        NULL,
2786                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2787                                        MatGetRowMinAbs_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        /*75*/ MatFDColoringApply_AIJ,
2793                                        MatSetFromOptions_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        MatFindZeroDiagonals_MPIAIJ,
2797                                        /*80*/ NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        /*83*/ MatLoad_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*89*/ NULL,
2807                                        NULL,
2808                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2812                                        NULL,
2813                                        NULL,
2814                                        NULL,
2815                                        MatBindToCPU_MPIAIJ,
2816                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2817                                        NULL,
2818                                        NULL,
2819                                        MatConjugate_MPIAIJ,
2820                                        NULL,
2821                                        /*104*/ MatSetValuesRow_MPIAIJ,
2822                                        MatRealPart_MPIAIJ,
2823                                        MatImaginaryPart_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        /*109*/ NULL,
2827                                        NULL,
2828                                        MatGetRowMin_MPIAIJ,
2829                                        NULL,
2830                                        MatMissingDiagonal_MPIAIJ,
2831                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2832                                        NULL,
2833                                        MatGetGhosts_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2837                                        NULL,
2838                                        NULL,
2839                                        NULL,
2840                                        MatGetMultiProcBlock_MPIAIJ,
2841                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2842                                        MatGetColumnReductions_MPIAIJ,
2843                                        MatInvertBlockDiagonal_MPIAIJ,
2844                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2845                                        MatCreateSubMatricesMPI_MPIAIJ,
2846                                        /*129*/ NULL,
2847                                        NULL,
2848                                        NULL,
2849                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2850                                        NULL,
2851                                        /*134*/ NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2857                                        NULL,
2858                                        NULL,
2859                                        MatFDColoringSetUp_MPIXAIJ,
2860                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2861                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2862                                        /*145*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatCreateGraph_Simple_AIJ,
2866                                        NULL,
2867                                        /*150*/ NULL,
2868                                        MatEliminateZeros_MPIAIJ,
2869                                        MatGetRowSumAbs_MPIAIJ,
2870                                        NULL,
2871                                        NULL,
2872                                        NULL};
2873 
2874 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2875 {
2876   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2877 
2878   PetscFunctionBegin;
2879   PetscCall(MatStoreValues(aij->A));
2880   PetscCall(MatStoreValues(aij->B));
2881   PetscFunctionReturn(PETSC_SUCCESS);
2882 }
2883 
2884 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2885 {
2886   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2887 
2888   PetscFunctionBegin;
2889   PetscCall(MatRetrieveValues(aij->A));
2890   PetscCall(MatRetrieveValues(aij->B));
2891   PetscFunctionReturn(PETSC_SUCCESS);
2892 }
2893 
2894 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2895 {
2896   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2897   PetscMPIInt size;
2898 
2899   PetscFunctionBegin;
2900   if (B->hash_active) {
2901     B->ops[0]      = b->cops;
2902     B->hash_active = PETSC_FALSE;
2903   }
2904   PetscCall(PetscLayoutSetUp(B->rmap));
2905   PetscCall(PetscLayoutSetUp(B->cmap));
2906 
2907 #if defined(PETSC_USE_CTABLE)
2908   PetscCall(PetscHMapIDestroy(&b->colmap));
2909 #else
2910   PetscCall(PetscFree(b->colmap));
2911 #endif
2912   PetscCall(PetscFree(b->garray));
2913   PetscCall(VecDestroy(&b->lvec));
2914   PetscCall(VecScatterDestroy(&b->Mvctx));
2915 
2916   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2917 
2918   MatSeqXAIJGetOptions_Private(b->B);
2919   PetscCall(MatDestroy(&b->B));
2920   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2921   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2922   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2923   PetscCall(MatSetType(b->B, MATSEQAIJ));
2924   MatSeqXAIJRestoreOptions_Private(b->B);
2925 
2926   MatSeqXAIJGetOptions_Private(b->A);
2927   PetscCall(MatDestroy(&b->A));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2929   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2930   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2931   PetscCall(MatSetType(b->A, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->A);
2933 
2934   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2935   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2936   B->preallocated  = PETSC_TRUE;
2937   B->was_assembled = PETSC_FALSE;
2938   B->assembled     = PETSC_FALSE;
2939   PetscFunctionReturn(PETSC_SUCCESS);
2940 }
2941 
2942 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2943 {
2944   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2945 
2946   PetscFunctionBegin;
2947   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2948   PetscCall(PetscLayoutSetUp(B->rmap));
2949   PetscCall(PetscLayoutSetUp(B->cmap));
2950   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2951   else {
2952 #if defined(PETSC_USE_CTABLE)
2953     PetscCall(PetscHMapIDestroy(&b->colmap));
2954 #else
2955     PetscCall(PetscFree(b->colmap));
2956 #endif
2957     PetscCall(PetscFree(b->garray));
2958     PetscCall(VecDestroy(&b->lvec));
2959   }
2960   PetscCall(VecScatterDestroy(&b->Mvctx));
2961 
2962   PetscCall(MatResetPreallocation(b->A));
2963   PetscCall(MatResetPreallocation(b->B));
2964   B->preallocated  = PETSC_TRUE;
2965   B->was_assembled = PETSC_FALSE;
2966   B->assembled     = PETSC_FALSE;
2967   PetscFunctionReturn(PETSC_SUCCESS);
2968 }
2969 
2970 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2971 {
2972   Mat         mat;
2973   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2974 
2975   PetscFunctionBegin;
2976   *newmat = NULL;
2977   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2978   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2979   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2980   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2981   a = (Mat_MPIAIJ *)mat->data;
2982 
2983   mat->factortype = matin->factortype;
2984   mat->assembled  = matin->assembled;
2985   mat->insertmode = NOT_SET_VALUES;
2986 
2987   a->size         = oldmat->size;
2988   a->rank         = oldmat->rank;
2989   a->donotstash   = oldmat->donotstash;
2990   a->roworiented  = oldmat->roworiented;
2991   a->rowindices   = NULL;
2992   a->rowvalues    = NULL;
2993   a->getrowactive = PETSC_FALSE;
2994 
2995   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2996   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2997   if (matin->hash_active) {
2998     PetscCall(MatSetUp(mat));
2999   } else {
3000     mat->preallocated = matin->preallocated;
3001     if (oldmat->colmap) {
3002 #if defined(PETSC_USE_CTABLE)
3003       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3004 #else
3005       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3006       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3007 #endif
3008     } else a->colmap = NULL;
3009     if (oldmat->garray) {
3010       PetscInt len;
3011       len = oldmat->B->cmap->n;
3012       PetscCall(PetscMalloc1(len + 1, &a->garray));
3013       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3014     } else a->garray = NULL;
3015 
3016     /* It may happen MatDuplicate is called with a non-assembled matrix
3017       In fact, MatDuplicate only requires the matrix to be preallocated
3018       This may happen inside a DMCreateMatrix_Shell */
3019     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3020     if (oldmat->Mvctx) {
3021       a->Mvctx = oldmat->Mvctx;
3022       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3023     }
3024     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3025     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3026   }
3027   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3028   *newmat = mat;
3029   PetscFunctionReturn(PETSC_SUCCESS);
3030 }
3031 
3032 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3033 {
3034   PetscBool isbinary, ishdf5;
3035 
3036   PetscFunctionBegin;
3037   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3038   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3039   /* force binary viewer to load .info file if it has not yet done so */
3040   PetscCall(PetscViewerSetUp(viewer));
3041   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3043   if (isbinary) {
3044     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3045   } else if (ishdf5) {
3046 #if defined(PETSC_HAVE_HDF5)
3047     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3048 #else
3049     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3050 #endif
3051   } else {
3052     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3053   }
3054   PetscFunctionReturn(PETSC_SUCCESS);
3055 }
3056 
3057 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3058 {
3059   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3060   PetscInt    *rowidxs, *colidxs;
3061   PetscScalar *matvals;
3062 
3063   PetscFunctionBegin;
3064   PetscCall(PetscViewerSetUp(viewer));
3065 
3066   /* read in matrix header */
3067   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3068   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3069   M  = header[1];
3070   N  = header[2];
3071   nz = header[3];
3072   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3073   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3074   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3075 
3076   /* set block sizes from the viewer's .info file */
3077   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3078   /* set global sizes if not set already */
3079   if (mat->rmap->N < 0) mat->rmap->N = M;
3080   if (mat->cmap->N < 0) mat->cmap->N = N;
3081   PetscCall(PetscLayoutSetUp(mat->rmap));
3082   PetscCall(PetscLayoutSetUp(mat->cmap));
3083 
3084   /* check if the matrix sizes are correct */
3085   PetscCall(MatGetSize(mat, &rows, &cols));
3086   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3087 
3088   /* read in row lengths and build row indices */
3089   PetscCall(MatGetLocalSize(mat, &m, NULL));
3090   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3091   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3092   rowidxs[0] = 0;
3093   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3094   if (nz != PETSC_INT_MAX) {
3095     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3096     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3097   }
3098 
3099   /* read in column indices and matrix values */
3100   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3101   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3102   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3103   /* store matrix indices and values */
3104   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3105   PetscCall(PetscFree(rowidxs));
3106   PetscCall(PetscFree2(colidxs, matvals));
3107   PetscFunctionReturn(PETSC_SUCCESS);
3108 }
3109 
3110 /* Not scalable because of ISAllGather() unless getting all columns. */
3111 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3112 {
3113   IS          iscol_local;
3114   PetscBool   isstride;
3115   PetscMPIInt lisstride = 0, gisstride;
3116 
3117   PetscFunctionBegin;
3118   /* check if we are grabbing all columns*/
3119   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3120 
3121   if (isstride) {
3122     PetscInt start, len, mstart, mlen;
3123     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3124     PetscCall(ISGetLocalSize(iscol, &len));
3125     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3126     if (mstart == start && mlen - mstart == len) lisstride = 1;
3127   }
3128 
3129   PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3130   if (gisstride) {
3131     PetscInt N;
3132     PetscCall(MatGetSize(mat, NULL, &N));
3133     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3134     PetscCall(ISSetIdentity(iscol_local));
3135     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3136   } else {
3137     PetscInt cbs;
3138     PetscCall(ISGetBlockSize(iscol, &cbs));
3139     PetscCall(ISAllGather(iscol, &iscol_local));
3140     PetscCall(ISSetBlockSize(iscol_local, cbs));
3141   }
3142 
3143   *isseq = iscol_local;
3144   PetscFunctionReturn(PETSC_SUCCESS);
3145 }
3146 
3147 /*
3148  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3149  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3150 
3151  Input Parameters:
3152 +   mat - matrix
3153 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3154            i.e., mat->rstart <= isrow[i] < mat->rend
3155 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3156            i.e., mat->cstart <= iscol[i] < mat->cend
3157 
3158  Output Parameters:
3159 +   isrow_d - sequential row index set for retrieving mat->A
3160 .   iscol_d - sequential  column index set for retrieving mat->A
3161 .   iscol_o - sequential column index set for retrieving mat->B
3162 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3163  */
3164 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3165 {
3166   Vec             x, cmap;
3167   const PetscInt *is_idx;
3168   PetscScalar    *xarray, *cmaparray;
3169   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3170   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3171   Mat             B    = a->B;
3172   Vec             lvec = a->lvec, lcmap;
3173   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3174   MPI_Comm        comm;
3175   VecScatter      Mvctx = a->Mvctx;
3176 
3177   PetscFunctionBegin;
3178   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3179   PetscCall(ISGetLocalSize(iscol, &ncols));
3180 
3181   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3182   PetscCall(MatCreateVecs(mat, &x, NULL));
3183   PetscCall(VecSet(x, -1.0));
3184   PetscCall(VecDuplicate(x, &cmap));
3185   PetscCall(VecSet(cmap, -1.0));
3186 
3187   /* Get start indices */
3188   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3189   isstart -= ncols;
3190   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3191 
3192   PetscCall(ISGetIndices(iscol, &is_idx));
3193   PetscCall(VecGetArray(x, &xarray));
3194   PetscCall(VecGetArray(cmap, &cmaparray));
3195   PetscCall(PetscMalloc1(ncols, &idx));
3196   for (i = 0; i < ncols; i++) {
3197     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3198     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3199     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3200   }
3201   PetscCall(VecRestoreArray(x, &xarray));
3202   PetscCall(VecRestoreArray(cmap, &cmaparray));
3203   PetscCall(ISRestoreIndices(iscol, &is_idx));
3204 
3205   /* Get iscol_d */
3206   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3207   PetscCall(ISGetBlockSize(iscol, &i));
3208   PetscCall(ISSetBlockSize(*iscol_d, i));
3209 
3210   /* Get isrow_d */
3211   PetscCall(ISGetLocalSize(isrow, &m));
3212   rstart = mat->rmap->rstart;
3213   PetscCall(PetscMalloc1(m, &idx));
3214   PetscCall(ISGetIndices(isrow, &is_idx));
3215   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3216   PetscCall(ISRestoreIndices(isrow, &is_idx));
3217 
3218   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3219   PetscCall(ISGetBlockSize(isrow, &i));
3220   PetscCall(ISSetBlockSize(*isrow_d, i));
3221 
3222   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3223   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3224   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3225 
3226   PetscCall(VecDuplicate(lvec, &lcmap));
3227 
3228   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3229   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3230 
3231   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3232   /* off-process column indices */
3233   count = 0;
3234   PetscCall(PetscMalloc1(Bn, &idx));
3235   PetscCall(PetscMalloc1(Bn, &cmap1));
3236 
3237   PetscCall(VecGetArray(lvec, &xarray));
3238   PetscCall(VecGetArray(lcmap, &cmaparray));
3239   for (i = 0; i < Bn; i++) {
3240     if (PetscRealPart(xarray[i]) > -1.0) {
3241       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3242       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3243       count++;
3244     }
3245   }
3246   PetscCall(VecRestoreArray(lvec, &xarray));
3247   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3248 
3249   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3250   /* cannot ensure iscol_o has same blocksize as iscol! */
3251 
3252   PetscCall(PetscFree(idx));
3253   *garray = cmap1;
3254 
3255   PetscCall(VecDestroy(&x));
3256   PetscCall(VecDestroy(&cmap));
3257   PetscCall(VecDestroy(&lcmap));
3258   PetscFunctionReturn(PETSC_SUCCESS);
3259 }
3260 
3261 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3263 {
3264   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3265   Mat         M = NULL;
3266   MPI_Comm    comm;
3267   IS          iscol_d, isrow_d, iscol_o;
3268   Mat         Asub = NULL, Bsub = NULL;
3269   PetscInt    n;
3270 
3271   PetscFunctionBegin;
3272   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3273 
3274   if (call == MAT_REUSE_MATRIX) {
3275     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3277     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3278 
3279     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3280     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3281 
3282     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3283     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3284 
3285     /* Update diagonal and off-diagonal portions of submat */
3286     asub = (Mat_MPIAIJ *)(*submat)->data;
3287     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3288     PetscCall(ISGetLocalSize(iscol_o, &n));
3289     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3290     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3291     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3292 
3293   } else { /* call == MAT_INITIAL_MATRIX) */
3294     const PetscInt *garray;
3295     PetscInt        BsubN;
3296 
3297     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3298     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3299 
3300     /* Create local submatrices Asub and Bsub */
3301     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3302     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3303 
3304     /* Create submatrix M */
3305     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3306 
3307     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3308     asub = (Mat_MPIAIJ *)M->data;
3309 
3310     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3311     n = asub->B->cmap->N;
3312     if (BsubN > n) {
3313       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3314       const PetscInt *idx;
3315       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3316       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3317 
3318       PetscCall(PetscMalloc1(n, &idx_new));
3319       j = 0;
3320       PetscCall(ISGetIndices(iscol_o, &idx));
3321       for (i = 0; i < n; i++) {
3322         if (j >= BsubN) break;
3323         while (subgarray[i] > garray[j]) j++;
3324 
3325         if (subgarray[i] == garray[j]) {
3326           idx_new[i] = idx[j++];
3327         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3328       }
3329       PetscCall(ISRestoreIndices(iscol_o, &idx));
3330 
3331       PetscCall(ISDestroy(&iscol_o));
3332       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3333 
3334     } else if (BsubN < n) {
3335       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3336     }
3337 
3338     PetscCall(PetscFree(garray));
3339     *submat = M;
3340 
3341     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3342     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3343     PetscCall(ISDestroy(&isrow_d));
3344 
3345     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3346     PetscCall(ISDestroy(&iscol_d));
3347 
3348     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3349     PetscCall(ISDestroy(&iscol_o));
3350   }
3351   PetscFunctionReturn(PETSC_SUCCESS);
3352 }
3353 
3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3355 {
3356   IS        iscol_local = NULL, isrow_d;
3357   PetscInt  csize;
3358   PetscInt  n, i, j, start, end;
3359   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3360   MPI_Comm  comm;
3361 
3362   PetscFunctionBegin;
3363   /* If isrow has same processor distribution as mat,
3364      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3365   if (call == MAT_REUSE_MATRIX) {
3366     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3367     if (isrow_d) {
3368       sameRowDist  = PETSC_TRUE;
3369       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3370     } else {
3371       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3372       if (iscol_local) {
3373         sameRowDist  = PETSC_TRUE;
3374         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3375       }
3376     }
3377   } else {
3378     /* Check if isrow has same processor distribution as mat */
3379     sameDist[0] = PETSC_FALSE;
3380     PetscCall(ISGetLocalSize(isrow, &n));
3381     if (!n) {
3382       sameDist[0] = PETSC_TRUE;
3383     } else {
3384       PetscCall(ISGetMinMax(isrow, &i, &j));
3385       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3386       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3387     }
3388 
3389     /* Check if iscol has same processor distribution as mat */
3390     sameDist[1] = PETSC_FALSE;
3391     PetscCall(ISGetLocalSize(iscol, &n));
3392     if (!n) {
3393       sameDist[1] = PETSC_TRUE;
3394     } else {
3395       PetscCall(ISGetMinMax(iscol, &i, &j));
3396       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3397       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3398     }
3399 
3400     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3401     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3402     sameRowDist = tsameDist[0];
3403   }
3404 
3405   if (sameRowDist) {
3406     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3407       /* isrow and iscol have same processor distribution as mat */
3408       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3409       PetscFunctionReturn(PETSC_SUCCESS);
3410     } else { /* sameRowDist */
3411       /* isrow has same processor distribution as mat */
3412       if (call == MAT_INITIAL_MATRIX) {
3413         PetscBool sorted;
3414         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3415         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3416         PetscCall(ISGetSize(iscol, &i));
3417         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3418 
3419         PetscCall(ISSorted(iscol_local, &sorted));
3420         if (sorted) {
3421           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3422           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3423           PetscFunctionReturn(PETSC_SUCCESS);
3424         }
3425       } else { /* call == MAT_REUSE_MATRIX */
3426         IS iscol_sub;
3427         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3428         if (iscol_sub) {
3429           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3430           PetscFunctionReturn(PETSC_SUCCESS);
3431         }
3432       }
3433     }
3434   }
3435 
3436   /* General case: iscol -> iscol_local which has global size of iscol */
3437   if (call == MAT_REUSE_MATRIX) {
3438     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3439     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3440   } else {
3441     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3442   }
3443 
3444   PetscCall(ISGetLocalSize(iscol, &csize));
3445   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3446 
3447   if (call == MAT_INITIAL_MATRIX) {
3448     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3449     PetscCall(ISDestroy(&iscol_local));
3450   }
3451   PetscFunctionReturn(PETSC_SUCCESS);
3452 }
3453 
3454 /*@C
3455   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3456   and "off-diagonal" part of the matrix in CSR format.
3457 
3458   Collective
3459 
3460   Input Parameters:
3461 + comm   - MPI communicator
3462 . A      - "diagonal" portion of matrix
3463 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3464 - garray - global index of `B` columns
3465 
3466   Output Parameter:
3467 . mat - the matrix, with input `A` as its local diagonal matrix
3468 
3469   Level: advanced
3470 
3471   Notes:
3472   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3473 
3474   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3475 
3476 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3477 @*/
3478 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3479 {
3480   Mat_MPIAIJ        *maij;
3481   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3482   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3483   const PetscScalar *oa;
3484   Mat                Bnew;
3485   PetscInt           m, n, N;
3486   MatType            mpi_mat_type;
3487 
3488   PetscFunctionBegin;
3489   PetscCall(MatCreate(comm, mat));
3490   PetscCall(MatGetSize(A, &m, &n));
3491   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3492   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3493   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3494   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3495 
3496   /* Get global columns of mat */
3497   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3498 
3499   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3500   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3501   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3502   PetscCall(MatSetType(*mat, mpi_mat_type));
3503 
3504   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3505   maij = (Mat_MPIAIJ *)(*mat)->data;
3506 
3507   (*mat)->preallocated = PETSC_TRUE;
3508 
3509   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3510   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3511 
3512   /* Set A as diagonal portion of *mat */
3513   maij->A = A;
3514 
3515   nz = oi[m];
3516   for (i = 0; i < nz; i++) {
3517     col   = oj[i];
3518     oj[i] = garray[col];
3519   }
3520 
3521   /* Set Bnew as off-diagonal portion of *mat */
3522   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3523   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3524   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3525   bnew        = (Mat_SeqAIJ *)Bnew->data;
3526   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3527   maij->B     = Bnew;
3528 
3529   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3530 
3531   b->free_a  = PETSC_FALSE;
3532   b->free_ij = PETSC_FALSE;
3533   PetscCall(MatDestroy(&B));
3534 
3535   bnew->free_a  = PETSC_TRUE;
3536   bnew->free_ij = PETSC_TRUE;
3537 
3538   /* condense columns of maij->B */
3539   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3540   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3541   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3542   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3543   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3544   PetscFunctionReturn(PETSC_SUCCESS);
3545 }
3546 
3547 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3548 
3549 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3550 {
3551   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3552   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3553   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3554   Mat             M, Msub, B = a->B;
3555   MatScalar      *aa;
3556   Mat_SeqAIJ     *aij;
3557   PetscInt       *garray = a->garray, *colsub, Ncols;
3558   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3559   IS              iscol_sub, iscmap;
3560   const PetscInt *is_idx, *cmap;
3561   PetscBool       allcolumns = PETSC_FALSE;
3562   MPI_Comm        comm;
3563 
3564   PetscFunctionBegin;
3565   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3566   if (call == MAT_REUSE_MATRIX) {
3567     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3568     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3569     PetscCall(ISGetLocalSize(iscol_sub, &count));
3570 
3571     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3572     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3573 
3574     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3575     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3576 
3577     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3578 
3579   } else { /* call == MAT_INITIAL_MATRIX) */
3580     PetscBool flg;
3581 
3582     PetscCall(ISGetLocalSize(iscol, &n));
3583     PetscCall(ISGetSize(iscol, &Ncols));
3584 
3585     /* (1) iscol -> nonscalable iscol_local */
3586     /* Check for special case: each processor gets entire matrix columns */
3587     PetscCall(ISIdentity(iscol_local, &flg));
3588     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3589     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3590     if (allcolumns) {
3591       iscol_sub = iscol_local;
3592       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3593       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3594 
3595     } else {
3596       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3597       PetscInt *idx, *cmap1, k;
3598       PetscCall(PetscMalloc1(Ncols, &idx));
3599       PetscCall(PetscMalloc1(Ncols, &cmap1));
3600       PetscCall(ISGetIndices(iscol_local, &is_idx));
3601       count = 0;
3602       k     = 0;
3603       for (i = 0; i < Ncols; i++) {
3604         j = is_idx[i];
3605         if (j >= cstart && j < cend) {
3606           /* diagonal part of mat */
3607           idx[count]     = j;
3608           cmap1[count++] = i; /* column index in submat */
3609         } else if (Bn) {
3610           /* off-diagonal part of mat */
3611           if (j == garray[k]) {
3612             idx[count]     = j;
3613             cmap1[count++] = i; /* column index in submat */
3614           } else if (j > garray[k]) {
3615             while (j > garray[k] && k < Bn - 1) k++;
3616             if (j == garray[k]) {
3617               idx[count]     = j;
3618               cmap1[count++] = i; /* column index in submat */
3619             }
3620           }
3621         }
3622       }
3623       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3624 
3625       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3626       PetscCall(ISGetBlockSize(iscol, &cbs));
3627       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3628 
3629       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3630     }
3631 
3632     /* (3) Create sequential Msub */
3633     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3634   }
3635 
3636   PetscCall(ISGetLocalSize(iscol_sub, &count));
3637   aij = (Mat_SeqAIJ *)Msub->data;
3638   ii  = aij->i;
3639   PetscCall(ISGetIndices(iscmap, &cmap));
3640 
3641   /*
3642       m - number of local rows
3643       Ncols - number of columns (same on all processors)
3644       rstart - first row in new global matrix generated
3645   */
3646   PetscCall(MatGetSize(Msub, &m, NULL));
3647 
3648   if (call == MAT_INITIAL_MATRIX) {
3649     /* (4) Create parallel newmat */
3650     PetscMPIInt rank, size;
3651     PetscInt    csize;
3652 
3653     PetscCallMPI(MPI_Comm_size(comm, &size));
3654     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3655 
3656     /*
3657         Determine the number of non-zeros in the diagonal and off-diagonal
3658         portions of the matrix in order to do correct preallocation
3659     */
3660 
3661     /* first get start and end of "diagonal" columns */
3662     PetscCall(ISGetLocalSize(iscol, &csize));
3663     if (csize == PETSC_DECIDE) {
3664       PetscCall(ISGetSize(isrow, &mglobal));
3665       if (mglobal == Ncols) { /* square matrix */
3666         nlocal = m;
3667       } else {
3668         nlocal = Ncols / size + ((Ncols % size) > rank);
3669       }
3670     } else {
3671       nlocal = csize;
3672     }
3673     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3674     rstart = rend - nlocal;
3675     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3676 
3677     /* next, compute all the lengths */
3678     jj = aij->j;
3679     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3680     olens = dlens + m;
3681     for (i = 0; i < m; i++) {
3682       jend = ii[i + 1] - ii[i];
3683       olen = 0;
3684       dlen = 0;
3685       for (j = 0; j < jend; j++) {
3686         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3687         else dlen++;
3688         jj++;
3689       }
3690       olens[i] = olen;
3691       dlens[i] = dlen;
3692     }
3693 
3694     PetscCall(ISGetBlockSize(isrow, &bs));
3695     PetscCall(ISGetBlockSize(iscol, &cbs));
3696 
3697     PetscCall(MatCreate(comm, &M));
3698     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3699     PetscCall(MatSetBlockSizes(M, bs, cbs));
3700     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3701     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3702     PetscCall(PetscFree(dlens));
3703 
3704   } else { /* call == MAT_REUSE_MATRIX */
3705     M = *newmat;
3706     PetscCall(MatGetLocalSize(M, &i, NULL));
3707     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3708     PetscCall(MatZeroEntries(M));
3709     /*
3710          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3711        rather than the slower MatSetValues().
3712     */
3713     M->was_assembled = PETSC_TRUE;
3714     M->assembled     = PETSC_FALSE;
3715   }
3716 
3717   /* (5) Set values of Msub to *newmat */
3718   PetscCall(PetscMalloc1(count, &colsub));
3719   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3720 
3721   jj = aij->j;
3722   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3723   for (i = 0; i < m; i++) {
3724     row = rstart + i;
3725     nz  = ii[i + 1] - ii[i];
3726     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3727     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3728     jj += nz;
3729     aa += nz;
3730   }
3731   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3732   PetscCall(ISRestoreIndices(iscmap, &cmap));
3733 
3734   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3735   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3736 
3737   PetscCall(PetscFree(colsub));
3738 
3739   /* save Msub, iscol_sub and iscmap used in processor for next request */
3740   if (call == MAT_INITIAL_MATRIX) {
3741     *newmat = M;
3742     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3743     PetscCall(MatDestroy(&Msub));
3744 
3745     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3746     PetscCall(ISDestroy(&iscol_sub));
3747 
3748     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3749     PetscCall(ISDestroy(&iscmap));
3750 
3751     if (iscol_local) {
3752       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3753       PetscCall(ISDestroy(&iscol_local));
3754     }
3755   }
3756   PetscFunctionReturn(PETSC_SUCCESS);
3757 }
3758 
3759 /*
3760     Not great since it makes two copies of the submatrix, first an SeqAIJ
3761   in local and then by concatenating the local matrices the end result.
3762   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3763 
3764   This requires a sequential iscol with all indices.
3765 */
3766 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3767 {
3768   PetscMPIInt rank, size;
3769   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3770   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3771   Mat         M, Mreuse;
3772   MatScalar  *aa, *vwork;
3773   MPI_Comm    comm;
3774   Mat_SeqAIJ *aij;
3775   PetscBool   colflag, allcolumns = PETSC_FALSE;
3776 
3777   PetscFunctionBegin;
3778   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3779   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3780   PetscCallMPI(MPI_Comm_size(comm, &size));
3781 
3782   /* Check for special case: each processor gets entire matrix columns */
3783   PetscCall(ISIdentity(iscol, &colflag));
3784   PetscCall(ISGetLocalSize(iscol, &n));
3785   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3786   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3787 
3788   if (call == MAT_REUSE_MATRIX) {
3789     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3790     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3791     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3792   } else {
3793     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3794   }
3795 
3796   /*
3797       m - number of local rows
3798       n - number of columns (same on all processors)
3799       rstart - first row in new global matrix generated
3800   */
3801   PetscCall(MatGetSize(Mreuse, &m, &n));
3802   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3803   if (call == MAT_INITIAL_MATRIX) {
3804     aij = (Mat_SeqAIJ *)Mreuse->data;
3805     ii  = aij->i;
3806     jj  = aij->j;
3807 
3808     /*
3809         Determine the number of non-zeros in the diagonal and off-diagonal
3810         portions of the matrix in order to do correct preallocation
3811     */
3812 
3813     /* first get start and end of "diagonal" columns */
3814     if (csize == PETSC_DECIDE) {
3815       PetscCall(ISGetSize(isrow, &mglobal));
3816       if (mglobal == n) { /* square matrix */
3817         nlocal = m;
3818       } else {
3819         nlocal = n / size + ((n % size) > rank);
3820       }
3821     } else {
3822       nlocal = csize;
3823     }
3824     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3825     rstart = rend - nlocal;
3826     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3827 
3828     /* next, compute all the lengths */
3829     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3830     olens = dlens + m;
3831     for (i = 0; i < m; i++) {
3832       jend = ii[i + 1] - ii[i];
3833       olen = 0;
3834       dlen = 0;
3835       for (j = 0; j < jend; j++) {
3836         if (*jj < rstart || *jj >= rend) olen++;
3837         else dlen++;
3838         jj++;
3839       }
3840       olens[i] = olen;
3841       dlens[i] = dlen;
3842     }
3843     PetscCall(MatCreate(comm, &M));
3844     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3845     PetscCall(MatSetBlockSizes(M, bs, cbs));
3846     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3847     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3848     PetscCall(PetscFree(dlens));
3849   } else {
3850     PetscInt ml, nl;
3851 
3852     M = *newmat;
3853     PetscCall(MatGetLocalSize(M, &ml, &nl));
3854     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3855     PetscCall(MatZeroEntries(M));
3856     /*
3857          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3858        rather than the slower MatSetValues().
3859     */
3860     M->was_assembled = PETSC_TRUE;
3861     M->assembled     = PETSC_FALSE;
3862   }
3863   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3864   aij = (Mat_SeqAIJ *)Mreuse->data;
3865   ii  = aij->i;
3866   jj  = aij->j;
3867 
3868   /* trigger copy to CPU if needed */
3869   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3870   for (i = 0; i < m; i++) {
3871     row   = rstart + i;
3872     nz    = ii[i + 1] - ii[i];
3873     cwork = jj;
3874     jj    = PetscSafePointerPlusOffset(jj, nz);
3875     vwork = aa;
3876     aa    = PetscSafePointerPlusOffset(aa, nz);
3877     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3878   }
3879   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3880 
3881   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3882   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3883   *newmat = M;
3884 
3885   /* save submatrix used in processor for next request */
3886   if (call == MAT_INITIAL_MATRIX) {
3887     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3888     PetscCall(MatDestroy(&Mreuse));
3889   }
3890   PetscFunctionReturn(PETSC_SUCCESS);
3891 }
3892 
3893 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3894 {
3895   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3896   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3897   const PetscInt *JJ;
3898   PetscBool       nooffprocentries;
3899   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3900 
3901   PetscFunctionBegin;
3902   PetscCall(PetscLayoutSetUp(B->rmap));
3903   PetscCall(PetscLayoutSetUp(B->cmap));
3904   m       = B->rmap->n;
3905   cstart  = B->cmap->rstart;
3906   cend    = B->cmap->rend;
3907   rstart  = B->rmap->rstart;
3908   irstart = Ii[0];
3909 
3910   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3911 
3912   if (PetscDefined(USE_DEBUG)) {
3913     for (i = 0; i < m; i++) {
3914       nnz = Ii[i + 1] - Ii[i];
3915       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3916       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3917       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3918       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3919     }
3920   }
3921 
3922   for (i = 0; i < m; i++) {
3923     nnz     = Ii[i + 1] - Ii[i];
3924     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3925     nnz_max = PetscMax(nnz_max, nnz);
3926     d       = 0;
3927     for (j = 0; j < nnz; j++) {
3928       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3929     }
3930     d_nnz[i] = d;
3931     o_nnz[i] = nnz - d;
3932   }
3933   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3934   PetscCall(PetscFree2(d_nnz, o_nnz));
3935 
3936   for (i = 0; i < m; i++) {
3937     ii = i + rstart;
3938     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3939   }
3940   nooffprocentries    = B->nooffprocentries;
3941   B->nooffprocentries = PETSC_TRUE;
3942   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3943   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3944   B->nooffprocentries = nooffprocentries;
3945 
3946   /* count number of entries below block diagonal */
3947   PetscCall(PetscFree(Aij->ld));
3948   PetscCall(PetscCalloc1(m, &ld));
3949   Aij->ld = ld;
3950   for (i = 0; i < m; i++) {
3951     nnz = Ii[i + 1] - Ii[i];
3952     j   = 0;
3953     while (j < nnz && J[j] < cstart) j++;
3954     ld[i] = j;
3955     if (J) J += nnz;
3956   }
3957 
3958   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3959   PetscFunctionReturn(PETSC_SUCCESS);
3960 }
3961 
3962 /*@
3963   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3964   (the default parallel PETSc format).
3965 
3966   Collective
3967 
3968   Input Parameters:
3969 + B - the matrix
3970 . i - the indices into `j` for the start of each local row (indices start with zero)
3971 . j - the column indices for each local row (indices start with zero)
3972 - v - optional values in the matrix
3973 
3974   Level: developer
3975 
3976   Notes:
3977   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3978   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3979   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3980 
3981   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3982 
3983   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3984 
3985   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3986 
3987   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3988   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3989 
3990   The format which is used for the sparse matrix input, is equivalent to a
3991   row-major ordering.. i.e for the following matrix, the input data expected is
3992   as shown
3993 .vb
3994         1 0 0
3995         2 0 3     P0
3996        -------
3997         4 5 6     P1
3998 
3999      Process0 [P0] rows_owned=[0,1]
4000         i =  {0,1,3}  [size = nrow+1  = 2+1]
4001         j =  {0,0,2}  [size = 3]
4002         v =  {1,2,3}  [size = 3]
4003 
4004      Process1 [P1] rows_owned=[2]
4005         i =  {0,3}    [size = nrow+1  = 1+1]
4006         j =  {0,1,2}  [size = 3]
4007         v =  {4,5,6}  [size = 3]
4008 .ve
4009 
4010 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4011           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4012 @*/
4013 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4014 {
4015   PetscFunctionBegin;
4016   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4017   PetscFunctionReturn(PETSC_SUCCESS);
4018 }
4019 
4020 /*@
4021   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4022   (the default parallel PETSc format).  For good matrix assembly performance
4023   the user should preallocate the matrix storage by setting the parameters
4024   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4025 
4026   Collective
4027 
4028   Input Parameters:
4029 + B     - the matrix
4030 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4031            (same value is used for all local rows)
4032 . d_nnz - array containing the number of nonzeros in the various rows of the
4033            DIAGONAL portion of the local submatrix (possibly different for each row)
4034            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4035            The size of this array is equal to the number of local rows, i.e 'm'.
4036            For matrices that will be factored, you must leave room for (and set)
4037            the diagonal entry even if it is zero.
4038 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4039            submatrix (same value is used for all local rows).
4040 - o_nnz - array containing the number of nonzeros in the various rows of the
4041            OFF-DIAGONAL portion of the local submatrix (possibly different for
4042            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4043            structure. The size of this array is equal to the number
4044            of local rows, i.e 'm'.
4045 
4046   Example Usage:
4047   Consider the following 8x8 matrix with 34 non-zero values, that is
4048   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4049   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4050   as follows
4051 
4052 .vb
4053             1  2  0  |  0  3  0  |  0  4
4054     Proc0   0  5  6  |  7  0  0  |  8  0
4055             9  0 10  | 11  0  0  | 12  0
4056     -------------------------------------
4057            13  0 14  | 15 16 17  |  0  0
4058     Proc1   0 18  0  | 19 20 21  |  0  0
4059             0  0  0  | 22 23  0  | 24  0
4060     -------------------------------------
4061     Proc2  25 26 27  |  0  0 28  | 29  0
4062            30  0  0  | 31 32 33  |  0 34
4063 .ve
4064 
4065   This can be represented as a collection of submatrices as
4066 .vb
4067       A B C
4068       D E F
4069       G H I
4070 .ve
4071 
4072   Where the submatrices A,B,C are owned by proc0, D,E,F are
4073   owned by proc1, G,H,I are owned by proc2.
4074 
4075   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4076   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4077   The 'M','N' parameters are 8,8, and have the same values on all procs.
4078 
4079   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4080   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4081   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4082   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4083   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4084   matrix, and [DF] as another `MATSEQAIJ` matrix.
4085 
4086   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4087   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4088   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4089   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4090   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4091   In this case, the values of `d_nz`, `o_nz` are
4092 .vb
4093      proc0  dnz = 2, o_nz = 2
4094      proc1  dnz = 3, o_nz = 2
4095      proc2  dnz = 1, o_nz = 4
4096 .ve
4097   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4098   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4099   for proc3. i.e we are using 12+15+10=37 storage locations to store
4100   34 values.
4101 
4102   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4103   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4104   In the above case the values for `d_nnz`, `o_nnz` are
4105 .vb
4106      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4107      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4108      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4109 .ve
4110   Here the space allocated is sum of all the above values i.e 34, and
4111   hence pre-allocation is perfect.
4112 
4113   Level: intermediate
4114 
4115   Notes:
4116   If the *_nnz parameter is given then the *_nz parameter is ignored
4117 
4118   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4119   storage.  The stored row and column indices begin with zero.
4120   See [Sparse Matrices](sec_matsparse) for details.
4121 
4122   The parallel matrix is partitioned such that the first m0 rows belong to
4123   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4124   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4125 
4126   The DIAGONAL portion of the local submatrix of a processor can be defined
4127   as the submatrix which is obtained by extraction the part corresponding to
4128   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4129   first row that belongs to the processor, r2 is the last row belonging to
4130   the this processor, and c1-c2 is range of indices of the local part of a
4131   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4132   common case of a square matrix, the row and column ranges are the same and
4133   the DIAGONAL part is also square. The remaining portion of the local
4134   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4135 
4136   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4137 
4138   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4139   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4140   You can also run with the option `-info` and look for messages with the string
4141   malloc in them to see if additional memory allocation was needed.
4142 
4143 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4144           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4145 @*/
4146 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4147 {
4148   PetscFunctionBegin;
4149   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4150   PetscValidType(B, 1);
4151   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4152   PetscFunctionReturn(PETSC_SUCCESS);
4153 }
4154 
4155 /*@
4156   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4157   CSR format for the local rows.
4158 
4159   Collective
4160 
4161   Input Parameters:
4162 + comm - MPI communicator
4163 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4164 . n    - This value should be the same as the local size used in creating the
4165          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4166          calculated if `N` is given) For square matrices n is almost always `m`.
4167 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4168 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4169 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4170 . j    - global column indices
4171 - a    - optional matrix values
4172 
4173   Output Parameter:
4174 . mat - the matrix
4175 
4176   Level: intermediate
4177 
4178   Notes:
4179   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4180   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4181   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4182 
4183   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4184 
4185   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4186 
4187   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4188   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4189 
4190   The format which is used for the sparse matrix input, is equivalent to a
4191   row-major ordering, i.e., for the following matrix, the input data expected is
4192   as shown
4193 .vb
4194         1 0 0
4195         2 0 3     P0
4196        -------
4197         4 5 6     P1
4198 
4199      Process0 [P0] rows_owned=[0,1]
4200         i =  {0,1,3}  [size = nrow+1  = 2+1]
4201         j =  {0,0,2}  [size = 3]
4202         v =  {1,2,3}  [size = 3]
4203 
4204      Process1 [P1] rows_owned=[2]
4205         i =  {0,3}    [size = nrow+1  = 1+1]
4206         j =  {0,1,2}  [size = 3]
4207         v =  {4,5,6}  [size = 3]
4208 .ve
4209 
4210 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4211           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4212 @*/
4213 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4214 {
4215   PetscFunctionBegin;
4216   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4217   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4218   PetscCall(MatCreate(comm, mat));
4219   PetscCall(MatSetSizes(*mat, m, n, M, N));
4220   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4221   PetscCall(MatSetType(*mat, MATMPIAIJ));
4222   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4223   PetscFunctionReturn(PETSC_SUCCESS);
4224 }
4225 
4226 /*@
4227   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4228   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4229   from `MatCreateMPIAIJWithArrays()`
4230 
4231   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4232 
4233   Collective
4234 
4235   Input Parameters:
4236 + mat - the matrix
4237 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4238 . n   - This value should be the same as the local size used in creating the
4239        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4240        calculated if N is given) For square matrices n is almost always m.
4241 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4242 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4243 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4244 . J   - column indices
4245 - v   - matrix values
4246 
4247   Level: deprecated
4248 
4249 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4250           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4251 @*/
4252 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4253 {
4254   PetscInt        nnz, i;
4255   PetscBool       nooffprocentries;
4256   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4257   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4258   PetscScalar    *ad, *ao;
4259   PetscInt        ldi, Iii, md;
4260   const PetscInt *Adi = Ad->i;
4261   PetscInt       *ld  = Aij->ld;
4262 
4263   PetscFunctionBegin;
4264   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4265   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4266   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4267   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4268 
4269   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4270   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4271 
4272   for (i = 0; i < m; i++) {
4273     if (PetscDefined(USE_DEBUG)) {
4274       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4275         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4276         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4277       }
4278     }
4279     nnz = Ii[i + 1] - Ii[i];
4280     Iii = Ii[i];
4281     ldi = ld[i];
4282     md  = Adi[i + 1] - Adi[i];
4283     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4284     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4285     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4286     ad += md;
4287     ao += nnz - md;
4288   }
4289   nooffprocentries      = mat->nooffprocentries;
4290   mat->nooffprocentries = PETSC_TRUE;
4291   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4292   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4293   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4294   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4295   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4296   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4297   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4298   mat->nooffprocentries = nooffprocentries;
4299   PetscFunctionReturn(PETSC_SUCCESS);
4300 }
4301 
4302 /*@
4303   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4304 
4305   Collective
4306 
4307   Input Parameters:
4308 + mat - the matrix
4309 - v   - matrix values, stored by row
4310 
4311   Level: intermediate
4312 
4313   Notes:
4314   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4315 
4316   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4317 
4318 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4319           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4320 @*/
4321 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4322 {
4323   PetscInt        nnz, i, m;
4324   PetscBool       nooffprocentries;
4325   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4326   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4327   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4328   PetscScalar    *ad, *ao;
4329   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4330   PetscInt        ldi, Iii, md;
4331   PetscInt       *ld = Aij->ld;
4332 
4333   PetscFunctionBegin;
4334   m = mat->rmap->n;
4335 
4336   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4337   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4338   Iii = 0;
4339   for (i = 0; i < m; i++) {
4340     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4341     ldi = ld[i];
4342     md  = Adi[i + 1] - Adi[i];
4343     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4344     ad += md;
4345     if (ao) {
4346       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4347       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4348       ao += nnz - md;
4349     }
4350     Iii += nnz;
4351   }
4352   nooffprocentries      = mat->nooffprocentries;
4353   mat->nooffprocentries = PETSC_TRUE;
4354   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4355   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4356   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4357   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4358   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4359   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4360   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4361   mat->nooffprocentries = nooffprocentries;
4362   PetscFunctionReturn(PETSC_SUCCESS);
4363 }
4364 
4365 /*@
4366   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4367   (the default parallel PETSc format).  For good matrix assembly performance
4368   the user should preallocate the matrix storage by setting the parameters
4369   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4370 
4371   Collective
4372 
4373   Input Parameters:
4374 + comm  - MPI communicator
4375 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4376           This value should be the same as the local size used in creating the
4377           y vector for the matrix-vector product y = Ax.
4378 . n     - This value should be the same as the local size used in creating the
4379           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4380           calculated if N is given) For square matrices n is almost always m.
4381 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4382 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4383 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4384           (same value is used for all local rows)
4385 . d_nnz - array containing the number of nonzeros in the various rows of the
4386           DIAGONAL portion of the local submatrix (possibly different for each row)
4387           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4388           The size of this array is equal to the number of local rows, i.e 'm'.
4389 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4390           submatrix (same value is used for all local rows).
4391 - o_nnz - array containing the number of nonzeros in the various rows of the
4392           OFF-DIAGONAL portion of the local submatrix (possibly different for
4393           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4394           structure. The size of this array is equal to the number
4395           of local rows, i.e 'm'.
4396 
4397   Output Parameter:
4398 . A - the matrix
4399 
4400   Options Database Keys:
4401 + -mat_no_inode                     - Do not use inodes
4402 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4403 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4404                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4405                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4406 
4407   Level: intermediate
4408 
4409   Notes:
4410   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4411   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4412   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4413 
4414   If the *_nnz parameter is given then the *_nz parameter is ignored
4415 
4416   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4417   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4418   storage requirements for this matrix.
4419 
4420   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4421   processor than it must be used on all processors that share the object for
4422   that argument.
4423 
4424   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4425   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4426 
4427   The user MUST specify either the local or global matrix dimensions
4428   (possibly both).
4429 
4430   The parallel matrix is partitioned across processors such that the
4431   first `m0` rows belong to process 0, the next `m1` rows belong to
4432   process 1, the next `m2` rows belong to process 2, etc., where
4433   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4434   values corresponding to [m x N] submatrix.
4435 
4436   The columns are logically partitioned with the n0 columns belonging
4437   to 0th partition, the next n1 columns belonging to the next
4438   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4439 
4440   The DIAGONAL portion of the local submatrix on any given processor
4441   is the submatrix corresponding to the rows and columns m,n
4442   corresponding to the given processor. i.e diagonal matrix on
4443   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4444   etc. The remaining portion of the local submatrix [m x (N-n)]
4445   constitute the OFF-DIAGONAL portion. The example below better
4446   illustrates this concept. The two matrices, the DIAGONAL portion and
4447   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4448 
4449   For a square global matrix we define each processor's diagonal portion
4450   to be its local rows and the corresponding columns (a square submatrix);
4451   each processor's off-diagonal portion encompasses the remainder of the
4452   local matrix (a rectangular submatrix).
4453 
4454   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4455 
4456   When calling this routine with a single process communicator, a matrix of
4457   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4458   type of communicator, use the construction mechanism
4459 .vb
4460   MatCreate(..., &A);
4461   MatSetType(A, MATMPIAIJ);
4462   MatSetSizes(A, m, n, M, N);
4463   MatMPIAIJSetPreallocation(A, ...);
4464 .ve
4465 
4466   By default, this format uses inodes (identical nodes) when possible.
4467   We search for consecutive rows with the same nonzero structure, thereby
4468   reusing matrix information to achieve increased efficiency.
4469 
4470   Example Usage:
4471   Consider the following 8x8 matrix with 34 non-zero values, that is
4472   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4473   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4474   as follows
4475 
4476 .vb
4477             1  2  0  |  0  3  0  |  0  4
4478     Proc0   0  5  6  |  7  0  0  |  8  0
4479             9  0 10  | 11  0  0  | 12  0
4480     -------------------------------------
4481            13  0 14  | 15 16 17  |  0  0
4482     Proc1   0 18  0  | 19 20 21  |  0  0
4483             0  0  0  | 22 23  0  | 24  0
4484     -------------------------------------
4485     Proc2  25 26 27  |  0  0 28  | 29  0
4486            30  0  0  | 31 32 33  |  0 34
4487 .ve
4488 
4489   This can be represented as a collection of submatrices as
4490 
4491 .vb
4492       A B C
4493       D E F
4494       G H I
4495 .ve
4496 
4497   Where the submatrices A,B,C are owned by proc0, D,E,F are
4498   owned by proc1, G,H,I are owned by proc2.
4499 
4500   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4501   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4502   The 'M','N' parameters are 8,8, and have the same values on all procs.
4503 
4504   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4505   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4506   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4507   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4508   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4509   matrix, and [DF] as another SeqAIJ matrix.
4510 
4511   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4512   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4513   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4514   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4515   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4516   In this case, the values of `d_nz`,`o_nz` are
4517 .vb
4518      proc0  dnz = 2, o_nz = 2
4519      proc1  dnz = 3, o_nz = 2
4520      proc2  dnz = 1, o_nz = 4
4521 .ve
4522   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4523   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4524   for proc3. i.e we are using 12+15+10=37 storage locations to store
4525   34 values.
4526 
4527   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4528   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4529   In the above case the values for d_nnz,o_nnz are
4530 .vb
4531      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4532      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4533      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4534 .ve
4535   Here the space allocated is sum of all the above values i.e 34, and
4536   hence pre-allocation is perfect.
4537 
4538 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4539           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4540           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4541 @*/
4542 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4543 {
4544   PetscMPIInt size;
4545 
4546   PetscFunctionBegin;
4547   PetscCall(MatCreate(comm, A));
4548   PetscCall(MatSetSizes(*A, m, n, M, N));
4549   PetscCallMPI(MPI_Comm_size(comm, &size));
4550   if (size > 1) {
4551     PetscCall(MatSetType(*A, MATMPIAIJ));
4552     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4553   } else {
4554     PetscCall(MatSetType(*A, MATSEQAIJ));
4555     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4556   }
4557   PetscFunctionReturn(PETSC_SUCCESS);
4558 }
4559 
4560 /*MC
4561     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4562 
4563     Synopsis:
4564     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4565 
4566     Not Collective
4567 
4568     Input Parameter:
4569 .   A - the `MATMPIAIJ` matrix
4570 
4571     Output Parameters:
4572 +   Ad - the diagonal portion of the matrix
4573 .   Ao - the off-diagonal portion of the matrix
4574 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4575 -   ierr - error code
4576 
4577      Level: advanced
4578 
4579     Note:
4580     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4581 
4582 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4583 M*/
4584 
4585 /*MC
4586     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4587 
4588     Synopsis:
4589     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4590 
4591     Not Collective
4592 
4593     Input Parameters:
4594 +   A - the `MATMPIAIJ` matrix
4595 .   Ad - the diagonal portion of the matrix
4596 .   Ao - the off-diagonal portion of the matrix
4597 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4598 -   ierr - error code
4599 
4600      Level: advanced
4601 
4602 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4603 M*/
4604 
4605 /*@C
4606   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4607 
4608   Not Collective
4609 
4610   Input Parameter:
4611 . A - The `MATMPIAIJ` matrix
4612 
4613   Output Parameters:
4614 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4615 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4616 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4617 
4618   Level: intermediate
4619 
4620   Note:
4621   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4622   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4623   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4624   local column numbers to global column numbers in the original matrix.
4625 
4626   Fortran Notes:
4627   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4628 
4629 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4630 @*/
4631 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4632 {
4633   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4634   PetscBool   flg;
4635 
4636   PetscFunctionBegin;
4637   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4638   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4639   if (Ad) *Ad = a->A;
4640   if (Ao) *Ao = a->B;
4641   if (colmap) *colmap = a->garray;
4642   PetscFunctionReturn(PETSC_SUCCESS);
4643 }
4644 
4645 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4646 {
4647   PetscInt     m, N, i, rstart, nnz, Ii;
4648   PetscInt    *indx;
4649   PetscScalar *values;
4650   MatType      rootType;
4651 
4652   PetscFunctionBegin;
4653   PetscCall(MatGetSize(inmat, &m, &N));
4654   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4655     PetscInt *dnz, *onz, sum, bs, cbs;
4656 
4657     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4658     /* Check sum(n) = N */
4659     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4660     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4661 
4662     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4663     rstart -= m;
4664 
4665     MatPreallocateBegin(comm, m, n, dnz, onz);
4666     for (i = 0; i < m; i++) {
4667       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4668       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4669       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4670     }
4671 
4672     PetscCall(MatCreate(comm, outmat));
4673     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4674     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4675     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4676     PetscCall(MatGetRootType_Private(inmat, &rootType));
4677     PetscCall(MatSetType(*outmat, rootType));
4678     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4679     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4680     MatPreallocateEnd(dnz, onz);
4681     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4682   }
4683 
4684   /* numeric phase */
4685   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4686   for (i = 0; i < m; i++) {
4687     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4688     Ii = i + rstart;
4689     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4690     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4691   }
4692   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4693   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4694   PetscFunctionReturn(PETSC_SUCCESS);
4695 }
4696 
4697 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4698 {
4699   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4700 
4701   PetscFunctionBegin;
4702   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4703   PetscCall(PetscFree(merge->id_r));
4704   PetscCall(PetscFree(merge->len_s));
4705   PetscCall(PetscFree(merge->len_r));
4706   PetscCall(PetscFree(merge->bi));
4707   PetscCall(PetscFree(merge->bj));
4708   PetscCall(PetscFree(merge->buf_ri[0]));
4709   PetscCall(PetscFree(merge->buf_ri));
4710   PetscCall(PetscFree(merge->buf_rj[0]));
4711   PetscCall(PetscFree(merge->buf_rj));
4712   PetscCall(PetscFree(merge->coi));
4713   PetscCall(PetscFree(merge->coj));
4714   PetscCall(PetscFree(merge->owners_co));
4715   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4716   PetscCall(PetscFree(merge));
4717   PetscFunctionReturn(PETSC_SUCCESS);
4718 }
4719 
4720 #include <../src/mat/utils/freespace.h>
4721 #include <petscbt.h>
4722 
4723 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4724 {
4725   MPI_Comm             comm;
4726   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4727   PetscMPIInt          size, rank, taga, *len_s;
4728   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4729   PetscMPIInt          proc, k;
4730   PetscInt           **buf_ri, **buf_rj;
4731   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4732   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4733   MPI_Request         *s_waits, *r_waits;
4734   MPI_Status          *status;
4735   const MatScalar     *aa, *a_a;
4736   MatScalar          **abuf_r, *ba_i;
4737   Mat_Merge_SeqsToMPI *merge;
4738   PetscContainer       container;
4739 
4740   PetscFunctionBegin;
4741   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4742   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4743 
4744   PetscCallMPI(MPI_Comm_size(comm, &size));
4745   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4746 
4747   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4748   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4749   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4750   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4751   aa = a_a;
4752 
4753   bi     = merge->bi;
4754   bj     = merge->bj;
4755   buf_ri = merge->buf_ri;
4756   buf_rj = merge->buf_rj;
4757 
4758   PetscCall(PetscMalloc1(size, &status));
4759   owners = merge->rowmap->range;
4760   len_s  = merge->len_s;
4761 
4762   /* send and recv matrix values */
4763   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4764   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4765 
4766   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4767   for (proc = 0, k = 0; proc < size; proc++) {
4768     if (!len_s[proc]) continue;
4769     i = owners[proc];
4770     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4771     k++;
4772   }
4773 
4774   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4775   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4776   PetscCall(PetscFree(status));
4777 
4778   PetscCall(PetscFree(s_waits));
4779   PetscCall(PetscFree(r_waits));
4780 
4781   /* insert mat values of mpimat */
4782   PetscCall(PetscMalloc1(N, &ba_i));
4783   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4784 
4785   for (k = 0; k < merge->nrecv; k++) {
4786     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4787     nrows       = *buf_ri_k[k];
4788     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4789     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4790   }
4791 
4792   /* set values of ba */
4793   m = merge->rowmap->n;
4794   for (i = 0; i < m; i++) {
4795     arow = owners[rank] + i;
4796     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4797     bnzi = bi[i + 1] - bi[i];
4798     PetscCall(PetscArrayzero(ba_i, bnzi));
4799 
4800     /* add local non-zero vals of this proc's seqmat into ba */
4801     anzi   = ai[arow + 1] - ai[arow];
4802     aj     = a->j + ai[arow];
4803     aa     = a_a + ai[arow];
4804     nextaj = 0;
4805     for (j = 0; nextaj < anzi; j++) {
4806       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4807         ba_i[j] += aa[nextaj++];
4808       }
4809     }
4810 
4811     /* add received vals into ba */
4812     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4813       /* i-th row */
4814       if (i == *nextrow[k]) {
4815         anzi   = *(nextai[k] + 1) - *nextai[k];
4816         aj     = buf_rj[k] + *nextai[k];
4817         aa     = abuf_r[k] + *nextai[k];
4818         nextaj = 0;
4819         for (j = 0; nextaj < anzi; j++) {
4820           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4821             ba_i[j] += aa[nextaj++];
4822           }
4823         }
4824         nextrow[k]++;
4825         nextai[k]++;
4826       }
4827     }
4828     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4829   }
4830   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4831   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4832   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4833 
4834   PetscCall(PetscFree(abuf_r[0]));
4835   PetscCall(PetscFree(abuf_r));
4836   PetscCall(PetscFree(ba_i));
4837   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4838   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4839   PetscFunctionReturn(PETSC_SUCCESS);
4840 }
4841 
4842 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4843 {
4844   Mat                  B_mpi;
4845   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4846   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4847   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4848   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4849   PetscInt             len, *dnz, *onz, bs, cbs;
4850   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4851   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4852   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4853   MPI_Status          *status;
4854   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4855   PetscBT              lnkbt;
4856   Mat_Merge_SeqsToMPI *merge;
4857   PetscContainer       container;
4858 
4859   PetscFunctionBegin;
4860   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4861 
4862   /* make sure it is a PETSc comm */
4863   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4864   PetscCallMPI(MPI_Comm_size(comm, &size));
4865   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4866 
4867   PetscCall(PetscNew(&merge));
4868   PetscCall(PetscMalloc1(size, &status));
4869 
4870   /* determine row ownership */
4871   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4872   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4873   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4874   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4875   PetscCall(PetscLayoutSetUp(merge->rowmap));
4876   PetscCall(PetscMalloc1(size, &len_si));
4877   PetscCall(PetscMalloc1(size, &merge->len_s));
4878 
4879   m      = merge->rowmap->n;
4880   owners = merge->rowmap->range;
4881 
4882   /* determine the number of messages to send, their lengths */
4883   len_s = merge->len_s;
4884 
4885   len          = 0; /* length of buf_si[] */
4886   merge->nsend = 0;
4887   for (PetscMPIInt proc = 0; proc < size; proc++) {
4888     len_si[proc] = 0;
4889     if (proc == rank) {
4890       len_s[proc] = 0;
4891     } else {
4892       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4893       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4894     }
4895     if (len_s[proc]) {
4896       merge->nsend++;
4897       nrows = 0;
4898       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4899         if (ai[i + 1] > ai[i]) nrows++;
4900       }
4901       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4902       len += len_si[proc];
4903     }
4904   }
4905 
4906   /* determine the number and length of messages to receive for ij-structure */
4907   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4908   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4909 
4910   /* post the Irecv of j-structure */
4911   PetscCall(PetscCommGetNewTag(comm, &tagj));
4912   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4913 
4914   /* post the Isend of j-structure */
4915   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4916 
4917   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4918     if (!len_s[proc]) continue;
4919     i = owners[proc];
4920     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4921     k++;
4922   }
4923 
4924   /* receives and sends of j-structure are complete */
4925   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4926   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4927 
4928   /* send and recv i-structure */
4929   PetscCall(PetscCommGetNewTag(comm, &tagi));
4930   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4931 
4932   PetscCall(PetscMalloc1(len + 1, &buf_s));
4933   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4934   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4935     if (!len_s[proc]) continue;
4936     /* form outgoing message for i-structure:
4937          buf_si[0]:                 nrows to be sent
4938                [1:nrows]:           row index (global)
4939                [nrows+1:2*nrows+1]: i-structure index
4940     */
4941     nrows       = len_si[proc] / 2 - 1;
4942     buf_si_i    = buf_si + nrows + 1;
4943     buf_si[0]   = nrows;
4944     buf_si_i[0] = 0;
4945     nrows       = 0;
4946     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4947       anzi = ai[i + 1] - ai[i];
4948       if (anzi) {
4949         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4950         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4951         nrows++;
4952       }
4953     }
4954     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4955     k++;
4956     buf_si += len_si[proc];
4957   }
4958 
4959   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4960   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4961 
4962   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4963   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4964 
4965   PetscCall(PetscFree(len_si));
4966   PetscCall(PetscFree(len_ri));
4967   PetscCall(PetscFree(rj_waits));
4968   PetscCall(PetscFree2(si_waits, sj_waits));
4969   PetscCall(PetscFree(ri_waits));
4970   PetscCall(PetscFree(buf_s));
4971   PetscCall(PetscFree(status));
4972 
4973   /* compute a local seq matrix in each processor */
4974   /* allocate bi array and free space for accumulating nonzero column info */
4975   PetscCall(PetscMalloc1(m + 1, &bi));
4976   bi[0] = 0;
4977 
4978   /* create and initialize a linked list */
4979   nlnk = N + 1;
4980   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4981 
4982   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4983   len = ai[owners[rank + 1]] - ai[owners[rank]];
4984   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4985 
4986   current_space = free_space;
4987 
4988   /* determine symbolic info for each local row */
4989   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4990 
4991   for (k = 0; k < merge->nrecv; k++) {
4992     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4993     nrows       = *buf_ri_k[k];
4994     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4995     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4996   }
4997 
4998   MatPreallocateBegin(comm, m, n, dnz, onz);
4999   len = 0;
5000   for (i = 0; i < m; i++) {
5001     bnzi = 0;
5002     /* add local non-zero cols of this proc's seqmat into lnk */
5003     arow = owners[rank] + i;
5004     anzi = ai[arow + 1] - ai[arow];
5005     aj   = a->j + ai[arow];
5006     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5007     bnzi += nlnk;
5008     /* add received col data into lnk */
5009     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5010       if (i == *nextrow[k]) {            /* i-th row */
5011         anzi = *(nextai[k] + 1) - *nextai[k];
5012         aj   = buf_rj[k] + *nextai[k];
5013         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5014         bnzi += nlnk;
5015         nextrow[k]++;
5016         nextai[k]++;
5017       }
5018     }
5019     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5020 
5021     /* if free space is not available, make more free space */
5022     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5023     /* copy data into free space, then initialize lnk */
5024     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5025     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5026 
5027     current_space->array += bnzi;
5028     current_space->local_used += bnzi;
5029     current_space->local_remaining -= bnzi;
5030 
5031     bi[i + 1] = bi[i] + bnzi;
5032   }
5033 
5034   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5035 
5036   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5037   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5038   PetscCall(PetscLLDestroy(lnk, lnkbt));
5039 
5040   /* create symbolic parallel matrix B_mpi */
5041   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5042   PetscCall(MatCreate(comm, &B_mpi));
5043   if (n == PETSC_DECIDE) {
5044     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5045   } else {
5046     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5047   }
5048   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5049   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5050   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5051   MatPreallocateEnd(dnz, onz);
5052   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5053 
5054   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5055   B_mpi->assembled = PETSC_FALSE;
5056   merge->bi        = bi;
5057   merge->bj        = bj;
5058   merge->buf_ri    = buf_ri;
5059   merge->buf_rj    = buf_rj;
5060   merge->coi       = NULL;
5061   merge->coj       = NULL;
5062   merge->owners_co = NULL;
5063 
5064   PetscCall(PetscCommDestroy(&comm));
5065 
5066   /* attach the supporting struct to B_mpi for reuse */
5067   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5068   PetscCall(PetscContainerSetPointer(container, merge));
5069   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5070   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5071   PetscCall(PetscContainerDestroy(&container));
5072   *mpimat = B_mpi;
5073 
5074   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5075   PetscFunctionReturn(PETSC_SUCCESS);
5076 }
5077 
5078 /*@
5079   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5080   matrices from each processor
5081 
5082   Collective
5083 
5084   Input Parameters:
5085 + comm   - the communicators the parallel matrix will live on
5086 . seqmat - the input sequential matrices
5087 . m      - number of local rows (or `PETSC_DECIDE`)
5088 . n      - number of local columns (or `PETSC_DECIDE`)
5089 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5090 
5091   Output Parameter:
5092 . mpimat - the parallel matrix generated
5093 
5094   Level: advanced
5095 
5096   Note:
5097   The dimensions of the sequential matrix in each processor MUST be the same.
5098   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5099   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5100 
5101 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5102 @*/
5103 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5104 {
5105   PetscMPIInt size;
5106 
5107   PetscFunctionBegin;
5108   PetscCallMPI(MPI_Comm_size(comm, &size));
5109   if (size == 1) {
5110     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5111     if (scall == MAT_INITIAL_MATRIX) {
5112       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5113     } else {
5114       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5115     }
5116     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5117     PetscFunctionReturn(PETSC_SUCCESS);
5118   }
5119   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5120   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5121   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5122   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5123   PetscFunctionReturn(PETSC_SUCCESS);
5124 }
5125 
5126 /*@
5127   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5128 
5129   Not Collective
5130 
5131   Input Parameter:
5132 . A - the matrix
5133 
5134   Output Parameter:
5135 . A_loc - the local sequential matrix generated
5136 
5137   Level: developer
5138 
5139   Notes:
5140   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5141   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5142   `n` is the global column count obtained with `MatGetSize()`
5143 
5144   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5145 
5146   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5147 
5148   Destroy the matrix with `MatDestroy()`
5149 
5150 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5151 @*/
5152 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5153 {
5154   PetscBool mpi;
5155 
5156   PetscFunctionBegin;
5157   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5158   if (mpi) {
5159     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5160   } else {
5161     *A_loc = A;
5162     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5163   }
5164   PetscFunctionReturn(PETSC_SUCCESS);
5165 }
5166 
5167 /*@
5168   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5169 
5170   Not Collective
5171 
5172   Input Parameters:
5173 + A     - the matrix
5174 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5175 
5176   Output Parameter:
5177 . A_loc - the local sequential matrix generated
5178 
5179   Level: developer
5180 
5181   Notes:
5182   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5183   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5184   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5185 
5186   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5187 
5188   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5189   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5190   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5191   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5192 
5193 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5194 @*/
5195 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5196 {
5197   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5198   Mat_SeqAIJ        *mat, *a, *b;
5199   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5200   const PetscScalar *aa, *ba, *aav, *bav;
5201   PetscScalar       *ca, *cam;
5202   PetscMPIInt        size;
5203   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5204   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5205   PetscBool          match;
5206 
5207   PetscFunctionBegin;
5208   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5209   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5210   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5211   if (size == 1) {
5212     if (scall == MAT_INITIAL_MATRIX) {
5213       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5214       *A_loc = mpimat->A;
5215     } else if (scall == MAT_REUSE_MATRIX) {
5216       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5217     }
5218     PetscFunctionReturn(PETSC_SUCCESS);
5219   }
5220 
5221   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5222   a  = (Mat_SeqAIJ *)mpimat->A->data;
5223   b  = (Mat_SeqAIJ *)mpimat->B->data;
5224   ai = a->i;
5225   aj = a->j;
5226   bi = b->i;
5227   bj = b->j;
5228   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5229   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5230   aa = aav;
5231   ba = bav;
5232   if (scall == MAT_INITIAL_MATRIX) {
5233     PetscCall(PetscMalloc1(1 + am, &ci));
5234     ci[0] = 0;
5235     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5236     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5237     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5238     k = 0;
5239     for (i = 0; i < am; i++) {
5240       ncols_o = bi[i + 1] - bi[i];
5241       ncols_d = ai[i + 1] - ai[i];
5242       /* off-diagonal portion of A */
5243       for (jo = 0; jo < ncols_o; jo++) {
5244         col = cmap[*bj];
5245         if (col >= cstart) break;
5246         cj[k] = col;
5247         bj++;
5248         ca[k++] = *ba++;
5249       }
5250       /* diagonal portion of A */
5251       for (j = 0; j < ncols_d; j++) {
5252         cj[k]   = cstart + *aj++;
5253         ca[k++] = *aa++;
5254       }
5255       /* off-diagonal portion of A */
5256       for (j = jo; j < ncols_o; j++) {
5257         cj[k]   = cmap[*bj++];
5258         ca[k++] = *ba++;
5259       }
5260     }
5261     /* put together the new matrix */
5262     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5263     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5264     /* Since these are PETSc arrays, change flags to free them as necessary. */
5265     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5266     mat->free_a  = PETSC_TRUE;
5267     mat->free_ij = PETSC_TRUE;
5268     mat->nonew   = 0;
5269   } else if (scall == MAT_REUSE_MATRIX) {
5270     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5271     ci  = mat->i;
5272     cj  = mat->j;
5273     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5274     for (i = 0; i < am; i++) {
5275       /* off-diagonal portion of A */
5276       ncols_o = bi[i + 1] - bi[i];
5277       for (jo = 0; jo < ncols_o; jo++) {
5278         col = cmap[*bj];
5279         if (col >= cstart) break;
5280         *cam++ = *ba++;
5281         bj++;
5282       }
5283       /* diagonal portion of A */
5284       ncols_d = ai[i + 1] - ai[i];
5285       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5286       /* off-diagonal portion of A */
5287       for (j = jo; j < ncols_o; j++) {
5288         *cam++ = *ba++;
5289         bj++;
5290       }
5291     }
5292     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5293   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5294   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5295   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5296   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5297   PetscFunctionReturn(PETSC_SUCCESS);
5298 }
5299 
5300 /*@
5301   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5302   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5303 
5304   Not Collective
5305 
5306   Input Parameters:
5307 + A     - the matrix
5308 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5309 
5310   Output Parameters:
5311 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5312 - A_loc - the local sequential matrix generated
5313 
5314   Level: developer
5315 
5316   Note:
5317   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5318   part, then those associated with the off-diagonal part (in its local ordering)
5319 
5320 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5321 @*/
5322 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5323 {
5324   Mat             Ao, Ad;
5325   const PetscInt *cmap;
5326   PetscMPIInt     size;
5327   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5328 
5329   PetscFunctionBegin;
5330   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5331   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5332   if (size == 1) {
5333     if (scall == MAT_INITIAL_MATRIX) {
5334       PetscCall(PetscObjectReference((PetscObject)Ad));
5335       *A_loc = Ad;
5336     } else if (scall == MAT_REUSE_MATRIX) {
5337       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5338     }
5339     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5340     PetscFunctionReturn(PETSC_SUCCESS);
5341   }
5342   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5343   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5344   if (f) {
5345     PetscCall((*f)(A, scall, glob, A_loc));
5346   } else {
5347     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5348     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5349     Mat_SeqAIJ        *c;
5350     PetscInt          *ai = a->i, *aj = a->j;
5351     PetscInt          *bi = b->i, *bj = b->j;
5352     PetscInt          *ci, *cj;
5353     const PetscScalar *aa, *ba;
5354     PetscScalar       *ca;
5355     PetscInt           i, j, am, dn, on;
5356 
5357     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5358     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5359     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5360     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5361     if (scall == MAT_INITIAL_MATRIX) {
5362       PetscInt k;
5363       PetscCall(PetscMalloc1(1 + am, &ci));
5364       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5365       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5366       ci[0] = 0;
5367       for (i = 0, k = 0; i < am; i++) {
5368         const PetscInt ncols_o = bi[i + 1] - bi[i];
5369         const PetscInt ncols_d = ai[i + 1] - ai[i];
5370         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5371         /* diagonal portion of A */
5372         for (j = 0; j < ncols_d; j++, k++) {
5373           cj[k] = *aj++;
5374           ca[k] = *aa++;
5375         }
5376         /* off-diagonal portion of A */
5377         for (j = 0; j < ncols_o; j++, k++) {
5378           cj[k] = dn + *bj++;
5379           ca[k] = *ba++;
5380         }
5381       }
5382       /* put together the new matrix */
5383       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5384       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5385       /* Since these are PETSc arrays, change flags to free them as necessary. */
5386       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5387       c->free_a  = PETSC_TRUE;
5388       c->free_ij = PETSC_TRUE;
5389       c->nonew   = 0;
5390       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5391     } else if (scall == MAT_REUSE_MATRIX) {
5392       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5393       for (i = 0; i < am; i++) {
5394         const PetscInt ncols_d = ai[i + 1] - ai[i];
5395         const PetscInt ncols_o = bi[i + 1] - bi[i];
5396         /* diagonal portion of A */
5397         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5398         /* off-diagonal portion of A */
5399         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5400       }
5401       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5402     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5403     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5404     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5405     if (glob) {
5406       PetscInt cst, *gidx;
5407 
5408       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5409       PetscCall(PetscMalloc1(dn + on, &gidx));
5410       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5411       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5412       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5413     }
5414   }
5415   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5416   PetscFunctionReturn(PETSC_SUCCESS);
5417 }
5418 
5419 /*@C
5420   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5421 
5422   Not Collective
5423 
5424   Input Parameters:
5425 + A     - the matrix
5426 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5427 . row   - index set of rows to extract (or `NULL`)
5428 - col   - index set of columns to extract (or `NULL`)
5429 
5430   Output Parameter:
5431 . A_loc - the local sequential matrix generated
5432 
5433   Level: developer
5434 
5435 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5436 @*/
5437 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5438 {
5439   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5440   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5441   IS          isrowa, iscola;
5442   Mat        *aloc;
5443   PetscBool   match;
5444 
5445   PetscFunctionBegin;
5446   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5447   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5448   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5449   if (!row) {
5450     start = A->rmap->rstart;
5451     end   = A->rmap->rend;
5452     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5453   } else {
5454     isrowa = *row;
5455   }
5456   if (!col) {
5457     start = A->cmap->rstart;
5458     cmap  = a->garray;
5459     nzA   = a->A->cmap->n;
5460     nzB   = a->B->cmap->n;
5461     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5462     ncols = 0;
5463     for (i = 0; i < nzB; i++) {
5464       if (cmap[i] < start) idx[ncols++] = cmap[i];
5465       else break;
5466     }
5467     imark = i;
5468     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5469     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5470     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5471   } else {
5472     iscola = *col;
5473   }
5474   if (scall != MAT_INITIAL_MATRIX) {
5475     PetscCall(PetscMalloc1(1, &aloc));
5476     aloc[0] = *A_loc;
5477   }
5478   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5479   if (!col) { /* attach global id of condensed columns */
5480     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5481   }
5482   *A_loc = aloc[0];
5483   PetscCall(PetscFree(aloc));
5484   if (!row) PetscCall(ISDestroy(&isrowa));
5485   if (!col) PetscCall(ISDestroy(&iscola));
5486   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5487   PetscFunctionReturn(PETSC_SUCCESS);
5488 }
5489 
5490 /*
5491  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5492  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5493  * on a global size.
5494  * */
5495 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5496 {
5497   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5498   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5499   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5500   PetscMPIInt            owner;
5501   PetscSFNode           *iremote, *oiremote;
5502   const PetscInt        *lrowindices;
5503   PetscSF                sf, osf;
5504   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5505   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5506   MPI_Comm               comm;
5507   ISLocalToGlobalMapping mapping;
5508   const PetscScalar     *pd_a, *po_a;
5509 
5510   PetscFunctionBegin;
5511   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5512   /* plocalsize is the number of roots
5513    * nrows is the number of leaves
5514    * */
5515   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5516   PetscCall(ISGetLocalSize(rows, &nrows));
5517   PetscCall(PetscCalloc1(nrows, &iremote));
5518   PetscCall(ISGetIndices(rows, &lrowindices));
5519   for (i = 0; i < nrows; i++) {
5520     /* Find a remote index and an owner for a row
5521      * The row could be local or remote
5522      * */
5523     owner = 0;
5524     lidx  = 0;
5525     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5526     iremote[i].index = lidx;
5527     iremote[i].rank  = owner;
5528   }
5529   /* Create SF to communicate how many nonzero columns for each row */
5530   PetscCall(PetscSFCreate(comm, &sf));
5531   /* SF will figure out the number of nonzero columns for each row, and their
5532    * offsets
5533    * */
5534   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5535   PetscCall(PetscSFSetFromOptions(sf));
5536   PetscCall(PetscSFSetUp(sf));
5537 
5538   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5539   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5540   PetscCall(PetscCalloc1(nrows, &pnnz));
5541   roffsets[0] = 0;
5542   roffsets[1] = 0;
5543   for (i = 0; i < plocalsize; i++) {
5544     /* diagonal */
5545     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5546     /* off-diagonal */
5547     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5548     /* compute offsets so that we relative location for each row */
5549     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5550     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5551   }
5552   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5553   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5554   /* 'r' means root, and 'l' means leaf */
5555   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5556   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5557   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5558   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5559   PetscCall(PetscSFDestroy(&sf));
5560   PetscCall(PetscFree(roffsets));
5561   PetscCall(PetscFree(nrcols));
5562   dntotalcols = 0;
5563   ontotalcols = 0;
5564   ncol        = 0;
5565   for (i = 0; i < nrows; i++) {
5566     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5567     ncol    = PetscMax(pnnz[i], ncol);
5568     /* diagonal */
5569     dntotalcols += nlcols[i * 2 + 0];
5570     /* off-diagonal */
5571     ontotalcols += nlcols[i * 2 + 1];
5572   }
5573   /* We do not need to figure the right number of columns
5574    * since all the calculations will be done by going through the raw data
5575    * */
5576   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5577   PetscCall(MatSetUp(*P_oth));
5578   PetscCall(PetscFree(pnnz));
5579   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5580   /* diagonal */
5581   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5582   /* off-diagonal */
5583   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5584   /* diagonal */
5585   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5586   /* off-diagonal */
5587   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5588   dntotalcols = 0;
5589   ontotalcols = 0;
5590   ntotalcols  = 0;
5591   for (i = 0; i < nrows; i++) {
5592     owner = 0;
5593     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5594     /* Set iremote for diag matrix */
5595     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5596       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5597       iremote[dntotalcols].rank  = owner;
5598       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5599       ilocal[dntotalcols++] = ntotalcols++;
5600     }
5601     /* off-diagonal */
5602     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5603       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5604       oiremote[ontotalcols].rank  = owner;
5605       oilocal[ontotalcols++]      = ntotalcols++;
5606     }
5607   }
5608   PetscCall(ISRestoreIndices(rows, &lrowindices));
5609   PetscCall(PetscFree(loffsets));
5610   PetscCall(PetscFree(nlcols));
5611   PetscCall(PetscSFCreate(comm, &sf));
5612   /* P serves as roots and P_oth is leaves
5613    * Diag matrix
5614    * */
5615   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5616   PetscCall(PetscSFSetFromOptions(sf));
5617   PetscCall(PetscSFSetUp(sf));
5618 
5619   PetscCall(PetscSFCreate(comm, &osf));
5620   /* off-diagonal */
5621   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5622   PetscCall(PetscSFSetFromOptions(osf));
5623   PetscCall(PetscSFSetUp(osf));
5624   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5625   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5626   /* operate on the matrix internal data to save memory */
5627   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5628   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5629   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5630   /* Convert to global indices for diag matrix */
5631   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5632   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5633   /* We want P_oth store global indices */
5634   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5635   /* Use memory scalable approach */
5636   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5637   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5638   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5639   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5640   /* Convert back to local indices */
5641   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5642   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5643   nout = 0;
5644   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5645   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5646   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5647   /* Exchange values */
5648   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5649   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5650   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5651   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5652   /* Stop PETSc from shrinking memory */
5653   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5654   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5655   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5656   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5657   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5658   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5659   PetscCall(PetscSFDestroy(&sf));
5660   PetscCall(PetscSFDestroy(&osf));
5661   PetscFunctionReturn(PETSC_SUCCESS);
5662 }
5663 
5664 /*
5665  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5666  * This supports MPIAIJ and MAIJ
5667  * */
5668 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5669 {
5670   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5671   Mat_SeqAIJ *p_oth;
5672   IS          rows, map;
5673   PetscHMapI  hamp;
5674   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5675   MPI_Comm    comm;
5676   PetscSF     sf, osf;
5677   PetscBool   has;
5678 
5679   PetscFunctionBegin;
5680   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5681   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5682   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5683    *  and then create a submatrix (that often is an overlapping matrix)
5684    * */
5685   if (reuse == MAT_INITIAL_MATRIX) {
5686     /* Use a hash table to figure out unique keys */
5687     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5688     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5689     count = 0;
5690     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5691     for (i = 0; i < a->B->cmap->n; i++) {
5692       key = a->garray[i] / dof;
5693       PetscCall(PetscHMapIHas(hamp, key, &has));
5694       if (!has) {
5695         mapping[i] = count;
5696         PetscCall(PetscHMapISet(hamp, key, count++));
5697       } else {
5698         /* Current 'i' has the same value the previous step */
5699         mapping[i] = count - 1;
5700       }
5701     }
5702     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5703     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5704     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5705     PetscCall(PetscCalloc1(htsize, &rowindices));
5706     off = 0;
5707     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5708     PetscCall(PetscHMapIDestroy(&hamp));
5709     PetscCall(PetscSortInt(htsize, rowindices));
5710     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5711     /* In case, the matrix was already created but users want to recreate the matrix */
5712     PetscCall(MatDestroy(P_oth));
5713     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5714     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5715     PetscCall(ISDestroy(&map));
5716     PetscCall(ISDestroy(&rows));
5717   } else if (reuse == MAT_REUSE_MATRIX) {
5718     /* If matrix was already created, we simply update values using SF objects
5719      * that as attached to the matrix earlier.
5720      */
5721     const PetscScalar *pd_a, *po_a;
5722 
5723     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5724     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5725     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5726     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5727     /* Update values in place */
5728     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5729     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5730     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5731     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5732     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5733     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5734     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5735     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5736   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5737   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5738   PetscFunctionReturn(PETSC_SUCCESS);
5739 }
5740 
5741 /*@C
5742   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5743 
5744   Collective
5745 
5746   Input Parameters:
5747 + A     - the first matrix in `MATMPIAIJ` format
5748 . B     - the second matrix in `MATMPIAIJ` format
5749 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5750 
5751   Output Parameters:
5752 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5753 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5754 - B_seq - the sequential matrix generated
5755 
5756   Level: developer
5757 
5758 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5759 @*/
5760 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5761 {
5762   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5763   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5764   IS          isrowb, iscolb;
5765   Mat        *bseq = NULL;
5766 
5767   PetscFunctionBegin;
5768   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5769              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5770   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5771 
5772   if (scall == MAT_INITIAL_MATRIX) {
5773     start = A->cmap->rstart;
5774     cmap  = a->garray;
5775     nzA   = a->A->cmap->n;
5776     nzB   = a->B->cmap->n;
5777     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5778     ncols = 0;
5779     for (i = 0; i < nzB; i++) { /* row < local row index */
5780       if (cmap[i] < start) idx[ncols++] = cmap[i];
5781       else break;
5782     }
5783     imark = i;
5784     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5785     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5786     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5787     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5788   } else {
5789     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5790     isrowb = *rowb;
5791     iscolb = *colb;
5792     PetscCall(PetscMalloc1(1, &bseq));
5793     bseq[0] = *B_seq;
5794   }
5795   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5796   *B_seq = bseq[0];
5797   PetscCall(PetscFree(bseq));
5798   if (!rowb) {
5799     PetscCall(ISDestroy(&isrowb));
5800   } else {
5801     *rowb = isrowb;
5802   }
5803   if (!colb) {
5804     PetscCall(ISDestroy(&iscolb));
5805   } else {
5806     *colb = iscolb;
5807   }
5808   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5809   PetscFunctionReturn(PETSC_SUCCESS);
5810 }
5811 
5812 /*
5813     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5814     of the OFF-DIAGONAL portion of local A
5815 
5816     Collective
5817 
5818    Input Parameters:
5819 +    A,B - the matrices in `MATMPIAIJ` format
5820 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5821 
5822    Output Parameter:
5823 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5824 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5825 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5826 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5827 
5828     Developer Note:
5829     This directly accesses information inside the VecScatter associated with the matrix-vector product
5830      for this matrix. This is not desirable..
5831 
5832     Level: developer
5833 
5834 */
5835 
5836 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5837 {
5838   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5839   VecScatter         ctx;
5840   MPI_Comm           comm;
5841   const PetscMPIInt *rprocs, *sprocs;
5842   PetscMPIInt        nrecvs, nsends;
5843   const PetscInt    *srow, *rstarts, *sstarts;
5844   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5845   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5846   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5847   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5848   PetscMPIInt        size, tag, rank, nreqs;
5849 
5850   PetscFunctionBegin;
5851   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5852   PetscCallMPI(MPI_Comm_size(comm, &size));
5853 
5854   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5855              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5856   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5857   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5858 
5859   if (size == 1) {
5860     startsj_s = NULL;
5861     bufa_ptr  = NULL;
5862     *B_oth    = NULL;
5863     PetscFunctionReturn(PETSC_SUCCESS);
5864   }
5865 
5866   ctx = a->Mvctx;
5867   tag = ((PetscObject)ctx)->tag;
5868 
5869   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5870   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5871   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5872   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5873   PetscCall(PetscMalloc1(nreqs, &reqs));
5874   rwaits = reqs;
5875   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5876 
5877   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5878   if (scall == MAT_INITIAL_MATRIX) {
5879     /* i-array */
5880     /*  post receives */
5881     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5882     for (i = 0; i < nrecvs; i++) {
5883       rowlen = rvalues + rstarts[i] * rbs;
5884       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5885       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5886     }
5887 
5888     /* pack the outgoing message */
5889     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5890 
5891     sstartsj[0] = 0;
5892     rstartsj[0] = 0;
5893     len         = 0; /* total length of j or a array to be sent */
5894     if (nsends) {
5895       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5896       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5897     }
5898     for (i = 0; i < nsends; i++) {
5899       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5900       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5901       for (j = 0; j < nrows; j++) {
5902         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5903         for (l = 0; l < sbs; l++) {
5904           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5905 
5906           rowlen[j * sbs + l] = ncols;
5907 
5908           len += ncols;
5909           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5910         }
5911         k++;
5912       }
5913       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5914 
5915       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5916     }
5917     /* recvs and sends of i-array are completed */
5918     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5919     PetscCall(PetscFree(svalues));
5920 
5921     /* allocate buffers for sending j and a arrays */
5922     PetscCall(PetscMalloc1(len + 1, &bufj));
5923     PetscCall(PetscMalloc1(len + 1, &bufa));
5924 
5925     /* create i-array of B_oth */
5926     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5927 
5928     b_othi[0] = 0;
5929     len       = 0; /* total length of j or a array to be received */
5930     k         = 0;
5931     for (i = 0; i < nrecvs; i++) {
5932       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5933       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5934       for (j = 0; j < nrows; j++) {
5935         b_othi[k + 1] = b_othi[k] + rowlen[j];
5936         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5937         k++;
5938       }
5939       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5940     }
5941     PetscCall(PetscFree(rvalues));
5942 
5943     /* allocate space for j and a arrays of B_oth */
5944     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5945     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5946 
5947     /* j-array */
5948     /*  post receives of j-array */
5949     for (i = 0; i < nrecvs; i++) {
5950       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5951       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5952     }
5953 
5954     /* pack the outgoing message j-array */
5955     if (nsends) k = sstarts[0];
5956     for (i = 0; i < nsends; i++) {
5957       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5958       bufJ  = bufj + sstartsj[i];
5959       for (j = 0; j < nrows; j++) {
5960         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5961         for (ll = 0; ll < sbs; ll++) {
5962           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5963           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5964           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5965         }
5966       }
5967       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5968     }
5969 
5970     /* recvs and sends of j-array are completed */
5971     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5972   } else if (scall == MAT_REUSE_MATRIX) {
5973     sstartsj = *startsj_s;
5974     rstartsj = *startsj_r;
5975     bufa     = *bufa_ptr;
5976     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5977   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5978 
5979   /* a-array */
5980   /*  post receives of a-array */
5981   for (i = 0; i < nrecvs; i++) {
5982     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5983     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5984   }
5985 
5986   /* pack the outgoing message a-array */
5987   if (nsends) k = sstarts[0];
5988   for (i = 0; i < nsends; i++) {
5989     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5990     bufA  = bufa + sstartsj[i];
5991     for (j = 0; j < nrows; j++) {
5992       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5993       for (ll = 0; ll < sbs; ll++) {
5994         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5995         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5996         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5997       }
5998     }
5999     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6000   }
6001   /* recvs and sends of a-array are completed */
6002   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6003   PetscCall(PetscFree(reqs));
6004 
6005   if (scall == MAT_INITIAL_MATRIX) {
6006     Mat_SeqAIJ *b_oth;
6007 
6008     /* put together the new matrix */
6009     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6010 
6011     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6012     /* Since these are PETSc arrays, change flags to free them as necessary. */
6013     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6014     b_oth->free_a  = PETSC_TRUE;
6015     b_oth->free_ij = PETSC_TRUE;
6016     b_oth->nonew   = 0;
6017 
6018     PetscCall(PetscFree(bufj));
6019     if (!startsj_s || !bufa_ptr) {
6020       PetscCall(PetscFree2(sstartsj, rstartsj));
6021       PetscCall(PetscFree(bufa_ptr));
6022     } else {
6023       *startsj_s = sstartsj;
6024       *startsj_r = rstartsj;
6025       *bufa_ptr  = bufa;
6026     }
6027   } else if (scall == MAT_REUSE_MATRIX) {
6028     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6029   }
6030 
6031   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6032   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6033   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6034   PetscFunctionReturn(PETSC_SUCCESS);
6035 }
6036 
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6040 #if defined(PETSC_HAVE_MKL_SPARSE)
6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6042 #endif
6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6045 #if defined(PETSC_HAVE_ELEMENTAL)
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6047 #endif
6048 #if defined(PETSC_HAVE_SCALAPACK)
6049 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 #if defined(PETSC_HAVE_HYPRE)
6052 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6053 #endif
6054 #if defined(PETSC_HAVE_CUDA)
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6056 #endif
6057 #if defined(PETSC_HAVE_HIP)
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6059 #endif
6060 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6062 #endif
6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6064 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6065 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6066 
6067 /*
6068     Computes (B'*A')' since computing B*A directly is untenable
6069 
6070                n                       p                          p
6071         [             ]       [             ]         [                 ]
6072       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6073         [             ]       [             ]         [                 ]
6074 
6075 */
6076 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6077 {
6078   Mat At, Bt, Ct;
6079 
6080   PetscFunctionBegin;
6081   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6082   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6083   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6084   PetscCall(MatDestroy(&At));
6085   PetscCall(MatDestroy(&Bt));
6086   PetscCall(MatTransposeSetPrecursor(Ct, C));
6087   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6088   PetscCall(MatDestroy(&Ct));
6089   PetscFunctionReturn(PETSC_SUCCESS);
6090 }
6091 
6092 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6093 {
6094   PetscBool cisdense;
6095 
6096   PetscFunctionBegin;
6097   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6098   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6099   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6100   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6101   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6102   PetscCall(MatSetUp(C));
6103 
6104   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6105   PetscFunctionReturn(PETSC_SUCCESS);
6106 }
6107 
6108 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6109 {
6110   Mat_Product *product = C->product;
6111   Mat          A = product->A, B = product->B;
6112 
6113   PetscFunctionBegin;
6114   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6115              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6116   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6117   C->ops->productsymbolic = MatProductSymbolic_AB;
6118   PetscFunctionReturn(PETSC_SUCCESS);
6119 }
6120 
6121 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6122 {
6123   Mat_Product *product = C->product;
6124 
6125   PetscFunctionBegin;
6126   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6127   PetscFunctionReturn(PETSC_SUCCESS);
6128 }
6129 
6130 /*
6131    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6132 
6133   Input Parameters:
6134 
6135     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6136     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6137 
6138     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6139 
6140     For Set1, j1[] contains column indices of the nonzeros.
6141     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6142     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6143     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6144 
6145     Similar for Set2.
6146 
6147     This routine merges the two sets of nonzeros row by row and removes repeats.
6148 
6149   Output Parameters: (memory is allocated by the caller)
6150 
6151     i[],j[]: the CSR of the merged matrix, which has m rows.
6152     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6153     imap2[]: similar to imap1[], but for Set2.
6154     Note we order nonzeros row-by-row and from left to right.
6155 */
6156 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6157 {
6158   PetscInt   r, m; /* Row index of mat */
6159   PetscCount t, t1, t2, b1, e1, b2, e2;
6160 
6161   PetscFunctionBegin;
6162   PetscCall(MatGetLocalSize(mat, &m, NULL));
6163   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6164   i[0]        = 0;
6165   for (r = 0; r < m; r++) { /* Do row by row merging */
6166     b1 = rowBegin1[r];
6167     e1 = rowEnd1[r];
6168     b2 = rowBegin2[r];
6169     e2 = rowEnd2[r];
6170     while (b1 < e1 && b2 < e2) {
6171       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6172         j[t]      = j1[b1];
6173         imap1[t1] = t;
6174         imap2[t2] = t;
6175         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6176         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6177         t1++;
6178         t2++;
6179         t++;
6180       } else if (j1[b1] < j2[b2]) {
6181         j[t]      = j1[b1];
6182         imap1[t1] = t;
6183         b1 += jmap1[t1 + 1] - jmap1[t1];
6184         t1++;
6185         t++;
6186       } else {
6187         j[t]      = j2[b2];
6188         imap2[t2] = t;
6189         b2 += jmap2[t2 + 1] - jmap2[t2];
6190         t2++;
6191         t++;
6192       }
6193     }
6194     /* Merge the remaining in either j1[] or j2[] */
6195     while (b1 < e1) {
6196       j[t]      = j1[b1];
6197       imap1[t1] = t;
6198       b1 += jmap1[t1 + 1] - jmap1[t1];
6199       t1++;
6200       t++;
6201     }
6202     while (b2 < e2) {
6203       j[t]      = j2[b2];
6204       imap2[t2] = t;
6205       b2 += jmap2[t2 + 1] - jmap2[t2];
6206       t2++;
6207       t++;
6208     }
6209     PetscCall(PetscIntCast(t, i + r + 1));
6210   }
6211   PetscFunctionReturn(PETSC_SUCCESS);
6212 }
6213 
6214 /*
6215   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6216 
6217   Input Parameters:
6218     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6219     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6220       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6221 
6222       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6223       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6224 
6225   Output Parameters:
6226     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6227     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6228       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6229       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6230 
6231     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6232       Atot: number of entries belonging to the diagonal block.
6233       Annz: number of unique nonzeros belonging to the diagonal block.
6234       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6235         repeats (i.e., same 'i,j' pair).
6236       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6237         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6238 
6239       Atot: number of entries belonging to the diagonal block
6240       Annz: number of unique nonzeros belonging to the diagonal block.
6241 
6242     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6243 
6244     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6245 */
6246 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6247 {
6248   PetscInt    cstart, cend, rstart, rend, row, col;
6249   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6250   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6251   PetscCount  k, m, p, q, r, s, mid;
6252   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6253 
6254   PetscFunctionBegin;
6255   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6256   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6257   m = rend - rstart;
6258 
6259   /* Skip negative rows */
6260   for (k = 0; k < n; k++)
6261     if (i[k] >= 0) break;
6262 
6263   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6264      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6265   */
6266   while (k < n) {
6267     row = i[k];
6268     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6269     for (s = k; s < n; s++)
6270       if (i[s] != row) break;
6271 
6272     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6273     for (p = k; p < s; p++) {
6274       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6275       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6276     }
6277     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6278     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6279     rowBegin[row - rstart] = k;
6280     rowMid[row - rstart]   = mid;
6281     rowEnd[row - rstart]   = s;
6282 
6283     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6284     Atot += mid - k;
6285     Btot += s - mid;
6286 
6287     /* Count unique nonzeros of this diag row */
6288     for (p = k; p < mid;) {
6289       col = j[p];
6290       do {
6291         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6292         p++;
6293       } while (p < mid && j[p] == col);
6294       Annz++;
6295     }
6296 
6297     /* Count unique nonzeros of this offdiag row */
6298     for (p = mid; p < s;) {
6299       col = j[p];
6300       do {
6301         p++;
6302       } while (p < s && j[p] == col);
6303       Bnnz++;
6304     }
6305     k = s;
6306   }
6307 
6308   /* Allocation according to Atot, Btot, Annz, Bnnz */
6309   PetscCall(PetscMalloc1(Atot, &Aperm));
6310   PetscCall(PetscMalloc1(Btot, &Bperm));
6311   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6312   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6313 
6314   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6315   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6316   for (r = 0; r < m; r++) {
6317     k   = rowBegin[r];
6318     mid = rowMid[r];
6319     s   = rowEnd[r];
6320     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6321     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6322     Atot += mid - k;
6323     Btot += s - mid;
6324 
6325     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6326     for (p = k; p < mid;) {
6327       col = j[p];
6328       q   = p;
6329       do {
6330         p++;
6331       } while (p < mid && j[p] == col);
6332       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6333       Annz++;
6334     }
6335 
6336     for (p = mid; p < s;) {
6337       col = j[p];
6338       q   = p;
6339       do {
6340         p++;
6341       } while (p < s && j[p] == col);
6342       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6343       Bnnz++;
6344     }
6345   }
6346   /* Output */
6347   *Aperm_ = Aperm;
6348   *Annz_  = Annz;
6349   *Atot_  = Atot;
6350   *Ajmap_ = Ajmap;
6351   *Bperm_ = Bperm;
6352   *Bnnz_  = Bnnz;
6353   *Btot_  = Btot;
6354   *Bjmap_ = Bjmap;
6355   PetscFunctionReturn(PETSC_SUCCESS);
6356 }
6357 
6358 /*
6359   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6360 
6361   Input Parameters:
6362     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6363     nnz:  number of unique nonzeros in the merged matrix
6364     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6365     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6366 
6367   Output Parameter: (memory is allocated by the caller)
6368     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6369 
6370   Example:
6371     nnz1 = 4
6372     nnz  = 6
6373     imap = [1,3,4,5]
6374     jmap = [0,3,5,6,7]
6375    then,
6376     jmap_new = [0,0,3,3,5,6,7]
6377 */
6378 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6379 {
6380   PetscCount k, p;
6381 
6382   PetscFunctionBegin;
6383   jmap_new[0] = 0;
6384   p           = nnz;                /* p loops over jmap_new[] backwards */
6385   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6386     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6387   }
6388   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6389   PetscFunctionReturn(PETSC_SUCCESS);
6390 }
6391 
6392 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6393 {
6394   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6395 
6396   PetscFunctionBegin;
6397   PetscCall(PetscSFDestroy(&coo->sf));
6398   PetscCall(PetscFree(coo->Aperm1));
6399   PetscCall(PetscFree(coo->Bperm1));
6400   PetscCall(PetscFree(coo->Ajmap1));
6401   PetscCall(PetscFree(coo->Bjmap1));
6402   PetscCall(PetscFree(coo->Aimap2));
6403   PetscCall(PetscFree(coo->Bimap2));
6404   PetscCall(PetscFree(coo->Aperm2));
6405   PetscCall(PetscFree(coo->Bperm2));
6406   PetscCall(PetscFree(coo->Ajmap2));
6407   PetscCall(PetscFree(coo->Bjmap2));
6408   PetscCall(PetscFree(coo->Cperm1));
6409   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6410   PetscCall(PetscFree(coo));
6411   PetscFunctionReturn(PETSC_SUCCESS);
6412 }
6413 
6414 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6415 {
6416   MPI_Comm             comm;
6417   PetscMPIInt          rank, size;
6418   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6419   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6420   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6421   PetscContainer       container;
6422   MatCOOStruct_MPIAIJ *coo;
6423 
6424   PetscFunctionBegin;
6425   PetscCall(PetscFree(mpiaij->garray));
6426   PetscCall(VecDestroy(&mpiaij->lvec));
6427 #if defined(PETSC_USE_CTABLE)
6428   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6429 #else
6430   PetscCall(PetscFree(mpiaij->colmap));
6431 #endif
6432   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6433   mat->assembled     = PETSC_FALSE;
6434   mat->was_assembled = PETSC_FALSE;
6435 
6436   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6437   PetscCallMPI(MPI_Comm_size(comm, &size));
6438   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6439   PetscCall(PetscLayoutSetUp(mat->rmap));
6440   PetscCall(PetscLayoutSetUp(mat->cmap));
6441   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6442   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6443   PetscCall(MatGetLocalSize(mat, &m, &n));
6444   PetscCall(MatGetSize(mat, &M, &N));
6445 
6446   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6447   /* entries come first, then local rows, then remote rows.                     */
6448   PetscCount n1 = coo_n, *perm1;
6449   PetscInt  *i1 = coo_i, *j1 = coo_j;
6450 
6451   PetscCall(PetscMalloc1(n1, &perm1));
6452   for (k = 0; k < n1; k++) perm1[k] = k;
6453 
6454   /* Manipulate indices so that entries with negative row or col indices will have smallest
6455      row indices, local entries will have greater but negative row indices, and remote entries
6456      will have positive row indices.
6457   */
6458   for (k = 0; k < n1; k++) {
6459     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6460     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6461     else {
6462       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6463       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6464     }
6465   }
6466 
6467   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6468   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6469 
6470   /* Advance k to the first entry we need to take care of */
6471   for (k = 0; k < n1; k++)
6472     if (i1[k] > PETSC_INT_MIN) break;
6473   PetscCount i1start = k;
6474 
6475   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6476   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6477 
6478   /*           Send remote rows to their owner                                  */
6479   /* Find which rows should be sent to which remote ranks*/
6480   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6481   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6482   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6483   const PetscInt *ranges;
6484   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6485 
6486   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6487   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6488   for (k = rem; k < n1;) {
6489     PetscMPIInt owner;
6490     PetscInt    firstRow, lastRow;
6491 
6492     /* Locate a row range */
6493     firstRow = i1[k]; /* first row of this owner */
6494     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6495     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6496 
6497     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6498     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6499 
6500     /* All entries in [k,p) belong to this remote owner */
6501     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6502       PetscMPIInt *sendto2;
6503       PetscInt    *nentries2;
6504       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6505 
6506       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6507       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6508       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6509       PetscCall(PetscFree2(sendto, nentries2));
6510       sendto   = sendto2;
6511       nentries = nentries2;
6512       maxNsend = maxNsend2;
6513     }
6514     sendto[nsend] = owner;
6515     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6516     nsend++;
6517     k = p;
6518   }
6519 
6520   /* Build 1st SF to know offsets on remote to send data */
6521   PetscSF      sf1;
6522   PetscInt     nroots = 1, nroots2 = 0;
6523   PetscInt     nleaves = nsend, nleaves2 = 0;
6524   PetscInt    *offsets;
6525   PetscSFNode *iremote;
6526 
6527   PetscCall(PetscSFCreate(comm, &sf1));
6528   PetscCall(PetscMalloc1(nsend, &iremote));
6529   PetscCall(PetscMalloc1(nsend, &offsets));
6530   for (k = 0; k < nsend; k++) {
6531     iremote[k].rank  = sendto[k];
6532     iremote[k].index = 0;
6533     nleaves2 += nentries[k];
6534     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6535   }
6536   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6537   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6538   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6539   PetscCall(PetscSFDestroy(&sf1));
6540   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6541 
6542   /* Build 2nd SF to send remote COOs to their owner */
6543   PetscSF sf2;
6544   nroots  = nroots2;
6545   nleaves = nleaves2;
6546   PetscCall(PetscSFCreate(comm, &sf2));
6547   PetscCall(PetscSFSetFromOptions(sf2));
6548   PetscCall(PetscMalloc1(nleaves, &iremote));
6549   p = 0;
6550   for (k = 0; k < nsend; k++) {
6551     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6552     for (q = 0; q < nentries[k]; q++, p++) {
6553       iremote[p].rank = sendto[k];
6554       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6555     }
6556   }
6557   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6558 
6559   /* Send the remote COOs to their owner */
6560   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6561   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6562   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6563   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6564   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6565   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6566   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6567   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6568   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6569   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6570   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6571 
6572   PetscCall(PetscFree(offsets));
6573   PetscCall(PetscFree2(sendto, nentries));
6574 
6575   /* Sort received COOs by row along with the permutation array     */
6576   for (k = 0; k < n2; k++) perm2[k] = k;
6577   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6578 
6579   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6580   PetscCount *Cperm1;
6581   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6582   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6583   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6584   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6585 
6586   /* Support for HYPRE matrices, kind of a hack.
6587      Swap min column with diagonal so that diagonal values will go first */
6588   PetscBool hypre;
6589   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6590   if (hypre) {
6591     PetscInt *minj;
6592     PetscBT   hasdiag;
6593 
6594     PetscCall(PetscBTCreate(m, &hasdiag));
6595     PetscCall(PetscMalloc1(m, &minj));
6596     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6597     for (k = i1start; k < rem; k++) {
6598       if (j1[k] < cstart || j1[k] >= cend) continue;
6599       const PetscInt rindex = i1[k] - rstart;
6600       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6601       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6602     }
6603     for (k = 0; k < n2; k++) {
6604       if (j2[k] < cstart || j2[k] >= cend) continue;
6605       const PetscInt rindex = i2[k] - rstart;
6606       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6607       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6608     }
6609     for (k = i1start; k < rem; k++) {
6610       const PetscInt rindex = i1[k] - rstart;
6611       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6612       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6613       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6614     }
6615     for (k = 0; k < n2; k++) {
6616       const PetscInt rindex = i2[k] - rstart;
6617       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6618       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6619       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6620     }
6621     PetscCall(PetscBTDestroy(&hasdiag));
6622     PetscCall(PetscFree(minj));
6623   }
6624 
6625   /* Split local COOs and received COOs into diag/offdiag portions */
6626   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6627   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6628   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6629   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6630   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6631   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6632 
6633   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6634   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6635   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6636   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6637 
6638   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6639   PetscInt *Ai, *Bi;
6640   PetscInt *Aj, *Bj;
6641 
6642   PetscCall(PetscMalloc1(m + 1, &Ai));
6643   PetscCall(PetscMalloc1(m + 1, &Bi));
6644   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6645   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6646 
6647   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6648   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6649   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6650   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6651   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6652 
6653   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6654   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6655 
6656   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6657   /* expect nonzeros in A/B most likely have local contributing entries        */
6658   PetscInt    Annz = Ai[m];
6659   PetscInt    Bnnz = Bi[m];
6660   PetscCount *Ajmap1_new, *Bjmap1_new;
6661 
6662   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6663   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6664 
6665   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6666   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6667 
6668   PetscCall(PetscFree(Aimap1));
6669   PetscCall(PetscFree(Ajmap1));
6670   PetscCall(PetscFree(Bimap1));
6671   PetscCall(PetscFree(Bjmap1));
6672   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6673   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6674   PetscCall(PetscFree(perm1));
6675   PetscCall(PetscFree3(i2, j2, perm2));
6676 
6677   Ajmap1 = Ajmap1_new;
6678   Bjmap1 = Bjmap1_new;
6679 
6680   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6681   if (Annz < Annz1 + Annz2) {
6682     PetscInt *Aj_new;
6683     PetscCall(PetscMalloc1(Annz, &Aj_new));
6684     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6685     PetscCall(PetscFree(Aj));
6686     Aj = Aj_new;
6687   }
6688 
6689   if (Bnnz < Bnnz1 + Bnnz2) {
6690     PetscInt *Bj_new;
6691     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6692     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6693     PetscCall(PetscFree(Bj));
6694     Bj = Bj_new;
6695   }
6696 
6697   /* Create new submatrices for on-process and off-process coupling                  */
6698   PetscScalar     *Aa, *Ba;
6699   MatType          rtype;
6700   Mat_SeqAIJ      *a, *b;
6701   PetscObjectState state;
6702   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6703   PetscCall(PetscCalloc1(Bnnz, &Ba));
6704   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6705   if (cstart) {
6706     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6707   }
6708 
6709   PetscCall(MatGetRootType_Private(mat, &rtype));
6710 
6711   MatSeqXAIJGetOptions_Private(mpiaij->A);
6712   PetscCall(MatDestroy(&mpiaij->A));
6713   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6714   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6715   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6716 
6717   MatSeqXAIJGetOptions_Private(mpiaij->B);
6718   PetscCall(MatDestroy(&mpiaij->B));
6719   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6720   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6721   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6722 
6723   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6724   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6725   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6726   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6727 
6728   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6729   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6730   a->free_a  = PETSC_TRUE;
6731   a->free_ij = PETSC_TRUE;
6732   b->free_a  = PETSC_TRUE;
6733   b->free_ij = PETSC_TRUE;
6734   a->maxnz   = a->nz;
6735   b->maxnz   = b->nz;
6736 
6737   /* conversion must happen AFTER multiply setup */
6738   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6739   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6740   PetscCall(VecDestroy(&mpiaij->lvec));
6741   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6742 
6743   // Put the COO struct in a container and then attach that to the matrix
6744   PetscCall(PetscMalloc1(1, &coo));
6745   coo->n       = coo_n;
6746   coo->sf      = sf2;
6747   coo->sendlen = nleaves;
6748   coo->recvlen = nroots;
6749   coo->Annz    = Annz;
6750   coo->Bnnz    = Bnnz;
6751   coo->Annz2   = Annz2;
6752   coo->Bnnz2   = Bnnz2;
6753   coo->Atot1   = Atot1;
6754   coo->Atot2   = Atot2;
6755   coo->Btot1   = Btot1;
6756   coo->Btot2   = Btot2;
6757   coo->Ajmap1  = Ajmap1;
6758   coo->Aperm1  = Aperm1;
6759   coo->Bjmap1  = Bjmap1;
6760   coo->Bperm1  = Bperm1;
6761   coo->Aimap2  = Aimap2;
6762   coo->Ajmap2  = Ajmap2;
6763   coo->Aperm2  = Aperm2;
6764   coo->Bimap2  = Bimap2;
6765   coo->Bjmap2  = Bjmap2;
6766   coo->Bperm2  = Bperm2;
6767   coo->Cperm1  = Cperm1;
6768   // Allocate in preallocation. If not used, it has zero cost on host
6769   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6770   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6771   PetscCall(PetscContainerSetPointer(container, coo));
6772   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6773   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6774   PetscCall(PetscContainerDestroy(&container));
6775   PetscFunctionReturn(PETSC_SUCCESS);
6776 }
6777 
6778 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6779 {
6780   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6781   Mat                  A = mpiaij->A, B = mpiaij->B;
6782   PetscScalar         *Aa, *Ba;
6783   PetscScalar         *sendbuf, *recvbuf;
6784   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6785   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6786   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6787   const PetscCount    *Cperm1;
6788   PetscContainer       container;
6789   MatCOOStruct_MPIAIJ *coo;
6790 
6791   PetscFunctionBegin;
6792   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6793   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6794   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6795   sendbuf = coo->sendbuf;
6796   recvbuf = coo->recvbuf;
6797   Ajmap1  = coo->Ajmap1;
6798   Ajmap2  = coo->Ajmap2;
6799   Aimap2  = coo->Aimap2;
6800   Bjmap1  = coo->Bjmap1;
6801   Bjmap2  = coo->Bjmap2;
6802   Bimap2  = coo->Bimap2;
6803   Aperm1  = coo->Aperm1;
6804   Aperm2  = coo->Aperm2;
6805   Bperm1  = coo->Bperm1;
6806   Bperm2  = coo->Bperm2;
6807   Cperm1  = coo->Cperm1;
6808 
6809   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6810   PetscCall(MatSeqAIJGetArray(B, &Ba));
6811 
6812   /* Pack entries to be sent to remote */
6813   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6814 
6815   /* Send remote entries to their owner and overlap the communication with local computation */
6816   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6817   /* Add local entries to A and B */
6818   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6819     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6820     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6821     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6822   }
6823   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6824     PetscScalar sum = 0.0;
6825     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6826     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6827   }
6828   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6829 
6830   /* Add received remote entries to A and B */
6831   for (PetscCount i = 0; i < coo->Annz2; i++) {
6832     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6833   }
6834   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6835     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6836   }
6837   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6838   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6839   PetscFunctionReturn(PETSC_SUCCESS);
6840 }
6841 
6842 /*MC
6843    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6844 
6845    Options Database Keys:
6846 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6847 
6848    Level: beginner
6849 
6850    Notes:
6851    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6852     in this case the values associated with the rows and columns one passes in are set to zero
6853     in the matrix
6854 
6855     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6856     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6857 
6858 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6859 M*/
6860 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6861 {
6862   Mat_MPIAIJ *b;
6863   PetscMPIInt size;
6864 
6865   PetscFunctionBegin;
6866   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6867 
6868   PetscCall(PetscNew(&b));
6869   B->data       = (void *)b;
6870   B->ops[0]     = MatOps_Values;
6871   B->assembled  = PETSC_FALSE;
6872   B->insertmode = NOT_SET_VALUES;
6873   b->size       = size;
6874 
6875   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6876 
6877   /* build cache for off array entries formed */
6878   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6879 
6880   b->donotstash  = PETSC_FALSE;
6881   b->colmap      = NULL;
6882   b->garray      = NULL;
6883   b->roworiented = PETSC_TRUE;
6884 
6885   /* stuff used for matrix vector multiply */
6886   b->lvec  = NULL;
6887   b->Mvctx = NULL;
6888 
6889   /* stuff for MatGetRow() */
6890   b->rowindices   = NULL;
6891   b->rowvalues    = NULL;
6892   b->getrowactive = PETSC_FALSE;
6893 
6894   /* flexible pointer used in CUSPARSE classes */
6895   b->spptr = NULL;
6896 
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6907 #if defined(PETSC_HAVE_CUDA)
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6909 #endif
6910 #if defined(PETSC_HAVE_HIP)
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6912 #endif
6913 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6915 #endif
6916 #if defined(PETSC_HAVE_MKL_SPARSE)
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6918 #endif
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6923 #if defined(PETSC_HAVE_ELEMENTAL)
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6925 #endif
6926 #if defined(PETSC_HAVE_SCALAPACK)
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6928 #endif
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6931 #if defined(PETSC_HAVE_HYPRE)
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6934 #endif
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6939   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6940   PetscFunctionReturn(PETSC_SUCCESS);
6941 }
6942 
6943 /*@
6944   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6945   and "off-diagonal" part of the matrix in CSR format.
6946 
6947   Collective
6948 
6949   Input Parameters:
6950 + comm - MPI communicator
6951 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6952 . n    - This value should be the same as the local size used in creating the
6953          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6954          calculated if `N` is given) For square matrices `n` is almost always `m`.
6955 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6956 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6957 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6958 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6959 . a    - matrix values
6960 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6961 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6962 - oa   - matrix values
6963 
6964   Output Parameter:
6965 . mat - the matrix
6966 
6967   Level: advanced
6968 
6969   Notes:
6970   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6971   must free the arrays once the matrix has been destroyed and not before.
6972 
6973   The `i` and `j` indices are 0 based
6974 
6975   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6976 
6977   This sets local rows and cannot be used to set off-processor values.
6978 
6979   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6980   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6981   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6982   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6983   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6984   communication if it is known that only local entries will be set.
6985 
6986 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6987           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6988 @*/
6989 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6990 {
6991   Mat_MPIAIJ *maij;
6992 
6993   PetscFunctionBegin;
6994   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6995   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6996   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6997   PetscCall(MatCreate(comm, mat));
6998   PetscCall(MatSetSizes(*mat, m, n, M, N));
6999   PetscCall(MatSetType(*mat, MATMPIAIJ));
7000   maij = (Mat_MPIAIJ *)(*mat)->data;
7001 
7002   (*mat)->preallocated = PETSC_TRUE;
7003 
7004   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7005   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7006 
7007   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7008   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7009 
7010   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7011   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7012   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7013   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7014   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7015   PetscFunctionReturn(PETSC_SUCCESS);
7016 }
7017 
7018 typedef struct {
7019   Mat       *mp;    /* intermediate products */
7020   PetscBool *mptmp; /* is the intermediate product temporary ? */
7021   PetscInt   cp;    /* number of intermediate products */
7022 
7023   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7024   PetscInt    *startsj_s, *startsj_r;
7025   PetscScalar *bufa;
7026   Mat          P_oth;
7027 
7028   /* may take advantage of merging product->B */
7029   Mat Bloc; /* B-local by merging diag and off-diag */
7030 
7031   /* cusparse does not have support to split between symbolic and numeric phases.
7032      When api_user is true, we don't need to update the numerical values
7033      of the temporary storage */
7034   PetscBool reusesym;
7035 
7036   /* support for COO values insertion */
7037   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7038   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7039   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7040   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7041   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7042   PetscMemType mtype;
7043 
7044   /* customization */
7045   PetscBool abmerge;
7046   PetscBool P_oth_bind;
7047 } MatMatMPIAIJBACKEND;
7048 
7049 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7050 {
7051   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7052   PetscInt             i;
7053 
7054   PetscFunctionBegin;
7055   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7056   PetscCall(PetscFree(mmdata->bufa));
7057   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7058   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7059   PetscCall(MatDestroy(&mmdata->P_oth));
7060   PetscCall(MatDestroy(&mmdata->Bloc));
7061   PetscCall(PetscSFDestroy(&mmdata->sf));
7062   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7063   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7064   PetscCall(PetscFree(mmdata->own[0]));
7065   PetscCall(PetscFree(mmdata->own));
7066   PetscCall(PetscFree(mmdata->off[0]));
7067   PetscCall(PetscFree(mmdata->off));
7068   PetscCall(PetscFree(mmdata));
7069   PetscFunctionReturn(PETSC_SUCCESS);
7070 }
7071 
7072 /* Copy selected n entries with indices in idx[] of A to v[].
7073    If idx is NULL, copy the whole data array of A to v[]
7074  */
7075 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7076 {
7077   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7078 
7079   PetscFunctionBegin;
7080   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7081   if (f) {
7082     PetscCall((*f)(A, n, idx, v));
7083   } else {
7084     const PetscScalar *vv;
7085 
7086     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7087     if (n && idx) {
7088       PetscScalar    *w  = v;
7089       const PetscInt *oi = idx;
7090       PetscInt        j;
7091 
7092       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7093     } else {
7094       PetscCall(PetscArraycpy(v, vv, n));
7095     }
7096     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7097   }
7098   PetscFunctionReturn(PETSC_SUCCESS);
7099 }
7100 
7101 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7102 {
7103   MatMatMPIAIJBACKEND *mmdata;
7104   PetscInt             i, n_d, n_o;
7105 
7106   PetscFunctionBegin;
7107   MatCheckProduct(C, 1);
7108   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7109   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7110   if (!mmdata->reusesym) { /* update temporary matrices */
7111     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7112     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7113   }
7114   mmdata->reusesym = PETSC_FALSE;
7115 
7116   for (i = 0; i < mmdata->cp; i++) {
7117     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7118     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7119   }
7120   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7121     PetscInt noff;
7122 
7123     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7124     if (mmdata->mptmp[i]) continue;
7125     if (noff) {
7126       PetscInt nown;
7127 
7128       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7129       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7130       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7131       n_o += noff;
7132       n_d += nown;
7133     } else {
7134       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7135 
7136       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7137       n_d += mm->nz;
7138     }
7139   }
7140   if (mmdata->hasoffproc) { /* offprocess insertion */
7141     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7142     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7143   }
7144   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7145   PetscFunctionReturn(PETSC_SUCCESS);
7146 }
7147 
7148 /* Support for Pt * A, A * P, or Pt * A * P */
7149 #define MAX_NUMBER_INTERMEDIATE 4
7150 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7151 {
7152   Mat_Product           *product = C->product;
7153   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7154   Mat_MPIAIJ            *a, *p;
7155   MatMatMPIAIJBACKEND   *mmdata;
7156   ISLocalToGlobalMapping P_oth_l2g = NULL;
7157   IS                     glob      = NULL;
7158   const char            *prefix;
7159   char                   pprefix[256];
7160   const PetscInt        *globidx, *P_oth_idx;
7161   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7162   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7163   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7164                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7165                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7166   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7167 
7168   MatProductType ptype;
7169   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7170   PetscMPIInt    size;
7171 
7172   PetscFunctionBegin;
7173   MatCheckProduct(C, 1);
7174   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7175   ptype = product->type;
7176   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7177     ptype                                          = MATPRODUCT_AB;
7178     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7179   }
7180   switch (ptype) {
7181   case MATPRODUCT_AB:
7182     A          = product->A;
7183     P          = product->B;
7184     m          = A->rmap->n;
7185     n          = P->cmap->n;
7186     M          = A->rmap->N;
7187     N          = P->cmap->N;
7188     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7189     break;
7190   case MATPRODUCT_AtB:
7191     P          = product->A;
7192     A          = product->B;
7193     m          = P->cmap->n;
7194     n          = A->cmap->n;
7195     M          = P->cmap->N;
7196     N          = A->cmap->N;
7197     hasoffproc = PETSC_TRUE;
7198     break;
7199   case MATPRODUCT_PtAP:
7200     A          = product->A;
7201     P          = product->B;
7202     m          = P->cmap->n;
7203     n          = P->cmap->n;
7204     M          = P->cmap->N;
7205     N          = P->cmap->N;
7206     hasoffproc = PETSC_TRUE;
7207     break;
7208   default:
7209     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7210   }
7211   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7212   if (size == 1) hasoffproc = PETSC_FALSE;
7213 
7214   /* defaults */
7215   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7216     mp[i]    = NULL;
7217     mptmp[i] = PETSC_FALSE;
7218     rmapt[i] = -1;
7219     cmapt[i] = -1;
7220     rmapa[i] = NULL;
7221     cmapa[i] = NULL;
7222   }
7223 
7224   /* customization */
7225   PetscCall(PetscNew(&mmdata));
7226   mmdata->reusesym = product->api_user;
7227   if (ptype == MATPRODUCT_AB) {
7228     if (product->api_user) {
7229       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7230       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7231       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7232       PetscOptionsEnd();
7233     } else {
7234       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7235       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7236       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7237       PetscOptionsEnd();
7238     }
7239   } else if (ptype == MATPRODUCT_PtAP) {
7240     if (product->api_user) {
7241       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7242       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7243       PetscOptionsEnd();
7244     } else {
7245       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7246       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7247       PetscOptionsEnd();
7248     }
7249   }
7250   a = (Mat_MPIAIJ *)A->data;
7251   p = (Mat_MPIAIJ *)P->data;
7252   PetscCall(MatSetSizes(C, m, n, M, N));
7253   PetscCall(PetscLayoutSetUp(C->rmap));
7254   PetscCall(PetscLayoutSetUp(C->cmap));
7255   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7256   PetscCall(MatGetOptionsPrefix(C, &prefix));
7257 
7258   cp = 0;
7259   switch (ptype) {
7260   case MATPRODUCT_AB: /* A * P */
7261     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7262 
7263     /* A_diag * P_local (merged or not) */
7264     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7265       /* P is product->B */
7266       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7267       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7268       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7269       PetscCall(MatProductSetFill(mp[cp], product->fill));
7270       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7271       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7272       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7273       mp[cp]->product->api_user = product->api_user;
7274       PetscCall(MatProductSetFromOptions(mp[cp]));
7275       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7276       PetscCall(ISGetIndices(glob, &globidx));
7277       rmapt[cp] = 1;
7278       cmapt[cp] = 2;
7279       cmapa[cp] = globidx;
7280       mptmp[cp] = PETSC_FALSE;
7281       cp++;
7282     } else { /* A_diag * P_diag and A_diag * P_off */
7283       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7284       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7285       PetscCall(MatProductSetFill(mp[cp], product->fill));
7286       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7287       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7288       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7289       mp[cp]->product->api_user = product->api_user;
7290       PetscCall(MatProductSetFromOptions(mp[cp]));
7291       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7292       rmapt[cp] = 1;
7293       cmapt[cp] = 1;
7294       mptmp[cp] = PETSC_FALSE;
7295       cp++;
7296       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7297       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7298       PetscCall(MatProductSetFill(mp[cp], product->fill));
7299       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7300       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7301       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7302       mp[cp]->product->api_user = product->api_user;
7303       PetscCall(MatProductSetFromOptions(mp[cp]));
7304       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7305       rmapt[cp] = 1;
7306       cmapt[cp] = 2;
7307       cmapa[cp] = p->garray;
7308       mptmp[cp] = PETSC_FALSE;
7309       cp++;
7310     }
7311 
7312     /* A_off * P_other */
7313     if (mmdata->P_oth) {
7314       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7315       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7316       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7317       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7318       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7319       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7320       PetscCall(MatProductSetFill(mp[cp], product->fill));
7321       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7322       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7323       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7324       mp[cp]->product->api_user = product->api_user;
7325       PetscCall(MatProductSetFromOptions(mp[cp]));
7326       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7327       rmapt[cp] = 1;
7328       cmapt[cp] = 2;
7329       cmapa[cp] = P_oth_idx;
7330       mptmp[cp] = PETSC_FALSE;
7331       cp++;
7332     }
7333     break;
7334 
7335   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7336     /* A is product->B */
7337     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7338     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7339       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7340       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7341       PetscCall(MatProductSetFill(mp[cp], product->fill));
7342       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7343       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7344       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7345       mp[cp]->product->api_user = product->api_user;
7346       PetscCall(MatProductSetFromOptions(mp[cp]));
7347       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7348       PetscCall(ISGetIndices(glob, &globidx));
7349       rmapt[cp] = 2;
7350       rmapa[cp] = globidx;
7351       cmapt[cp] = 2;
7352       cmapa[cp] = globidx;
7353       mptmp[cp] = PETSC_FALSE;
7354       cp++;
7355     } else {
7356       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7357       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7358       PetscCall(MatProductSetFill(mp[cp], product->fill));
7359       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7360       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7361       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7362       mp[cp]->product->api_user = product->api_user;
7363       PetscCall(MatProductSetFromOptions(mp[cp]));
7364       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7365       PetscCall(ISGetIndices(glob, &globidx));
7366       rmapt[cp] = 1;
7367       cmapt[cp] = 2;
7368       cmapa[cp] = globidx;
7369       mptmp[cp] = PETSC_FALSE;
7370       cp++;
7371       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7372       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7373       PetscCall(MatProductSetFill(mp[cp], product->fill));
7374       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7375       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7376       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7377       mp[cp]->product->api_user = product->api_user;
7378       PetscCall(MatProductSetFromOptions(mp[cp]));
7379       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7380       rmapt[cp] = 2;
7381       rmapa[cp] = p->garray;
7382       cmapt[cp] = 2;
7383       cmapa[cp] = globidx;
7384       mptmp[cp] = PETSC_FALSE;
7385       cp++;
7386     }
7387     break;
7388   case MATPRODUCT_PtAP:
7389     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7390     /* P is product->B */
7391     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7392     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7393     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7394     PetscCall(MatProductSetFill(mp[cp], product->fill));
7395     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7396     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7397     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7398     mp[cp]->product->api_user = product->api_user;
7399     PetscCall(MatProductSetFromOptions(mp[cp]));
7400     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7401     PetscCall(ISGetIndices(glob, &globidx));
7402     rmapt[cp] = 2;
7403     rmapa[cp] = globidx;
7404     cmapt[cp] = 2;
7405     cmapa[cp] = globidx;
7406     mptmp[cp] = PETSC_FALSE;
7407     cp++;
7408     if (mmdata->P_oth) {
7409       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7410       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7411       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7412       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7413       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7414       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7415       PetscCall(MatProductSetFill(mp[cp], product->fill));
7416       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7417       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7418       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7419       mp[cp]->product->api_user = product->api_user;
7420       PetscCall(MatProductSetFromOptions(mp[cp]));
7421       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7422       mptmp[cp] = PETSC_TRUE;
7423       cp++;
7424       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7425       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7426       PetscCall(MatProductSetFill(mp[cp], product->fill));
7427       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7428       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7429       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7430       mp[cp]->product->api_user = product->api_user;
7431       PetscCall(MatProductSetFromOptions(mp[cp]));
7432       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7433       rmapt[cp] = 2;
7434       rmapa[cp] = globidx;
7435       cmapt[cp] = 2;
7436       cmapa[cp] = P_oth_idx;
7437       mptmp[cp] = PETSC_FALSE;
7438       cp++;
7439     }
7440     break;
7441   default:
7442     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7443   }
7444   /* sanity check */
7445   if (size > 1)
7446     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7447 
7448   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7449   for (i = 0; i < cp; i++) {
7450     mmdata->mp[i]    = mp[i];
7451     mmdata->mptmp[i] = mptmp[i];
7452   }
7453   mmdata->cp             = cp;
7454   C->product->data       = mmdata;
7455   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7456   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7457 
7458   /* memory type */
7459   mmdata->mtype = PETSC_MEMTYPE_HOST;
7460   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7461   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7462   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7463   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7464   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7465   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7466 
7467   /* prepare coo coordinates for values insertion */
7468 
7469   /* count total nonzeros of those intermediate seqaij Mats
7470     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7471     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7472     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7473   */
7474   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7475     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7476     if (mptmp[cp]) continue;
7477     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7478       const PetscInt *rmap = rmapa[cp];
7479       const PetscInt  mr   = mp[cp]->rmap->n;
7480       const PetscInt  rs   = C->rmap->rstart;
7481       const PetscInt  re   = C->rmap->rend;
7482       const PetscInt *ii   = mm->i;
7483       for (i = 0; i < mr; i++) {
7484         const PetscInt gr = rmap[i];
7485         const PetscInt nz = ii[i + 1] - ii[i];
7486         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7487         else ncoo_oown += nz;                  /* this row is local */
7488       }
7489     } else ncoo_d += mm->nz;
7490   }
7491 
7492   /*
7493     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7494 
7495     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7496 
7497     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7498 
7499     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7500     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7501     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7502 
7503     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7504     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7505   */
7506   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7507   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7508 
7509   /* gather (i,j) of nonzeros inserted by remote procs */
7510   if (hasoffproc) {
7511     PetscSF  msf;
7512     PetscInt ncoo2, *coo_i2, *coo_j2;
7513 
7514     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7515     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7516     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7517 
7518     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7519       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7520       PetscInt   *idxoff = mmdata->off[cp];
7521       PetscInt   *idxown = mmdata->own[cp];
7522       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7523         const PetscInt *rmap = rmapa[cp];
7524         const PetscInt *cmap = cmapa[cp];
7525         const PetscInt *ii   = mm->i;
7526         PetscInt       *coi  = coo_i + ncoo_o;
7527         PetscInt       *coj  = coo_j + ncoo_o;
7528         const PetscInt  mr   = mp[cp]->rmap->n;
7529         const PetscInt  rs   = C->rmap->rstart;
7530         const PetscInt  re   = C->rmap->rend;
7531         const PetscInt  cs   = C->cmap->rstart;
7532         for (i = 0; i < mr; i++) {
7533           const PetscInt *jj = mm->j + ii[i];
7534           const PetscInt  gr = rmap[i];
7535           const PetscInt  nz = ii[i + 1] - ii[i];
7536           if (gr < rs || gr >= re) { /* this is an offproc row */
7537             for (j = ii[i]; j < ii[i + 1]; j++) {
7538               *coi++    = gr;
7539               *idxoff++ = j;
7540             }
7541             if (!cmapt[cp]) { /* already global */
7542               for (j = 0; j < nz; j++) *coj++ = jj[j];
7543             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7544               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7545             } else { /* offdiag */
7546               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7547             }
7548             ncoo_o += nz;
7549           } else { /* this is a local row */
7550             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7551           }
7552         }
7553       }
7554       mmdata->off[cp + 1] = idxoff;
7555       mmdata->own[cp + 1] = idxown;
7556     }
7557 
7558     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7559     PetscInt incoo_o;
7560     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7561     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7562     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7563     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7564     ncoo = ncoo_d + ncoo_oown + ncoo2;
7565     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7566     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7567     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7568     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7569     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7570     PetscCall(PetscFree2(coo_i, coo_j));
7571     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7572     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7573     coo_i = coo_i2;
7574     coo_j = coo_j2;
7575   } else { /* no offproc values insertion */
7576     ncoo = ncoo_d;
7577     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7578 
7579     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7580     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7581     PetscCall(PetscSFSetUp(mmdata->sf));
7582   }
7583   mmdata->hasoffproc = hasoffproc;
7584 
7585   /* gather (i,j) of nonzeros inserted locally */
7586   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7587     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7588     PetscInt       *coi  = coo_i + ncoo_d;
7589     PetscInt       *coj  = coo_j + ncoo_d;
7590     const PetscInt *jj   = mm->j;
7591     const PetscInt *ii   = mm->i;
7592     const PetscInt *cmap = cmapa[cp];
7593     const PetscInt *rmap = rmapa[cp];
7594     const PetscInt  mr   = mp[cp]->rmap->n;
7595     const PetscInt  rs   = C->rmap->rstart;
7596     const PetscInt  re   = C->rmap->rend;
7597     const PetscInt  cs   = C->cmap->rstart;
7598 
7599     if (mptmp[cp]) continue;
7600     if (rmapt[cp] == 1) { /* consecutive rows */
7601       /* fill coo_i */
7602       for (i = 0; i < mr; i++) {
7603         const PetscInt gr = i + rs;
7604         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7605       }
7606       /* fill coo_j */
7607       if (!cmapt[cp]) { /* type-0, already global */
7608         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7609       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7610         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7611       } else {                                            /* type-2, local to global for sparse columns */
7612         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7613       }
7614       ncoo_d += mm->nz;
7615     } else if (rmapt[cp] == 2) { /* sparse rows */
7616       for (i = 0; i < mr; i++) {
7617         const PetscInt *jj = mm->j + ii[i];
7618         const PetscInt  gr = rmap[i];
7619         const PetscInt  nz = ii[i + 1] - ii[i];
7620         if (gr >= rs && gr < re) { /* local rows */
7621           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7622           if (!cmapt[cp]) { /* type-0, already global */
7623             for (j = 0; j < nz; j++) *coj++ = jj[j];
7624           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7625             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7626           } else { /* type-2, local to global for sparse columns */
7627             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7628           }
7629           ncoo_d += nz;
7630         }
7631       }
7632     }
7633   }
7634   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7635   PetscCall(ISDestroy(&glob));
7636   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7637   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7638   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7639   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7640 
7641   /* preallocate with COO data */
7642   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7643   PetscCall(PetscFree2(coo_i, coo_j));
7644   PetscFunctionReturn(PETSC_SUCCESS);
7645 }
7646 
7647 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7648 {
7649   Mat_Product *product = mat->product;
7650 #if defined(PETSC_HAVE_DEVICE)
7651   PetscBool match  = PETSC_FALSE;
7652   PetscBool usecpu = PETSC_FALSE;
7653 #else
7654   PetscBool match = PETSC_TRUE;
7655 #endif
7656 
7657   PetscFunctionBegin;
7658   MatCheckProduct(mat, 1);
7659 #if defined(PETSC_HAVE_DEVICE)
7660   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7661   if (match) { /* we can always fallback to the CPU if requested */
7662     switch (product->type) {
7663     case MATPRODUCT_AB:
7664       if (product->api_user) {
7665         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7666         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7667         PetscOptionsEnd();
7668       } else {
7669         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7670         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7671         PetscOptionsEnd();
7672       }
7673       break;
7674     case MATPRODUCT_AtB:
7675       if (product->api_user) {
7676         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7677         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7678         PetscOptionsEnd();
7679       } else {
7680         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7681         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7682         PetscOptionsEnd();
7683       }
7684       break;
7685     case MATPRODUCT_PtAP:
7686       if (product->api_user) {
7687         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7688         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7689         PetscOptionsEnd();
7690       } else {
7691         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7692         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7693         PetscOptionsEnd();
7694       }
7695       break;
7696     default:
7697       break;
7698     }
7699     match = (PetscBool)!usecpu;
7700   }
7701 #endif
7702   if (match) {
7703     switch (product->type) {
7704     case MATPRODUCT_AB:
7705     case MATPRODUCT_AtB:
7706     case MATPRODUCT_PtAP:
7707       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7708       break;
7709     default:
7710       break;
7711     }
7712   }
7713   /* fallback to MPIAIJ ops */
7714   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7715   PetscFunctionReturn(PETSC_SUCCESS);
7716 }
7717 
7718 /*
7719    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7720 
7721    n - the number of block indices in cc[]
7722    cc - the block indices (must be large enough to contain the indices)
7723 */
7724 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7725 {
7726   PetscInt        cnt = -1, nidx, j;
7727   const PetscInt *idx;
7728 
7729   PetscFunctionBegin;
7730   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7731   if (nidx) {
7732     cnt     = 0;
7733     cc[cnt] = idx[0] / bs;
7734     for (j = 1; j < nidx; j++) {
7735       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7736     }
7737   }
7738   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7739   *n = cnt + 1;
7740   PetscFunctionReturn(PETSC_SUCCESS);
7741 }
7742 
7743 /*
7744     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7745 
7746     ncollapsed - the number of block indices
7747     collapsed - the block indices (must be large enough to contain the indices)
7748 */
7749 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7750 {
7751   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7752 
7753   PetscFunctionBegin;
7754   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7755   for (i = start + 1; i < start + bs; i++) {
7756     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7757     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7758     cprevtmp = cprev;
7759     cprev    = merged;
7760     merged   = cprevtmp;
7761   }
7762   *ncollapsed = nprev;
7763   if (collapsed) *collapsed = cprev;
7764   PetscFunctionReturn(PETSC_SUCCESS);
7765 }
7766 
7767 /*
7768  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7769 
7770  Input Parameter:
7771  . Amat - matrix
7772  - symmetrize - make the result symmetric
7773  + scale - scale with diagonal
7774 
7775  Output Parameter:
7776  . a_Gmat - output scalar graph >= 0
7777 
7778 */
7779 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7780 {
7781   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7782   MPI_Comm  comm;
7783   Mat       Gmat;
7784   PetscBool ismpiaij, isseqaij;
7785   Mat       a, b, c;
7786   MatType   jtype;
7787 
7788   PetscFunctionBegin;
7789   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7790   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7791   PetscCall(MatGetSize(Amat, &MM, &NN));
7792   PetscCall(MatGetBlockSize(Amat, &bs));
7793   nloc = (Iend - Istart) / bs;
7794 
7795   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7796   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7797   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7798 
7799   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7800   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7801      implementation */
7802   if (bs > 1) {
7803     PetscCall(MatGetType(Amat, &jtype));
7804     PetscCall(MatCreate(comm, &Gmat));
7805     PetscCall(MatSetType(Gmat, jtype));
7806     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7807     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7808     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7809       PetscInt  *d_nnz, *o_nnz;
7810       MatScalar *aa, val, *AA;
7811       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7812 
7813       if (isseqaij) {
7814         a = Amat;
7815         b = NULL;
7816       } else {
7817         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7818         a             = d->A;
7819         b             = d->B;
7820       }
7821       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7822       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7823       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7824         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7825         const PetscInt *cols1, *cols2;
7826 
7827         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7828           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7829           nnz[brow / bs] = nc2 / bs;
7830           if (nc2 % bs) ok = 0;
7831           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7832           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7833             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7834             if (nc1 != nc2) ok = 0;
7835             else {
7836               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7837                 if (cols1[jj] != cols2[jj]) ok = 0;
7838                 if (cols1[jj] % bs != jj % bs) ok = 0;
7839               }
7840             }
7841             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7842           }
7843           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7844           if (!ok) {
7845             PetscCall(PetscFree2(d_nnz, o_nnz));
7846             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7847             goto old_bs;
7848           }
7849         }
7850       }
7851       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7852       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7853       PetscCall(PetscFree2(d_nnz, o_nnz));
7854       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7855       // diag
7856       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7857         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7858 
7859         ai = aseq->i;
7860         n  = ai[brow + 1] - ai[brow];
7861         aj = aseq->j + ai[brow];
7862         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7863           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7864           val        = 0;
7865           if (index_size == 0) {
7866             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7867               aa = aseq->a + ai[brow + ii] + k;
7868               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7869                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7870               }
7871             }
7872           } else {                                            // use (index,index) value if provided
7873             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7874               PetscInt ii = index[iii];
7875               aa          = aseq->a + ai[brow + ii] + k;
7876               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7877                 PetscInt jj = index[jjj];
7878                 val += PetscAbs(PetscRealPart(aa[jj]));
7879               }
7880             }
7881           }
7882           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7883           AA[k / bs] = val;
7884         }
7885         grow = Istart / bs + brow / bs;
7886         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7887       }
7888       // off-diag
7889       if (ismpiaij) {
7890         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7891         const PetscScalar *vals;
7892         const PetscInt    *cols, *garray = aij->garray;
7893 
7894         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7895         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7896           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7897           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7898             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7899             AA[k / bs] = 0;
7900             AJ[cidx]   = garray[cols[k]] / bs;
7901           }
7902           nc = ncols / bs;
7903           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7904           if (index_size == 0) {
7905             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7906               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7907               for (PetscInt k = 0; k < ncols; k += bs) {
7908                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7909                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7910                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7911                 }
7912               }
7913               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7914             }
7915           } else {                                            // use (index,index) value if provided
7916             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7917               PetscInt ii = index[iii];
7918               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7919               for (PetscInt k = 0; k < ncols; k += bs) {
7920                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7921                   PetscInt jj = index[jjj];
7922                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7923                 }
7924               }
7925               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7926             }
7927           }
7928           grow = Istart / bs + brow / bs;
7929           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7930         }
7931       }
7932       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7933       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7934       PetscCall(PetscFree2(AA, AJ));
7935     } else {
7936       const PetscScalar *vals;
7937       const PetscInt    *idx;
7938       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7939     old_bs:
7940       /*
7941        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7942        */
7943       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7944       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7945       if (isseqaij) {
7946         PetscInt max_d_nnz;
7947 
7948         /*
7949          Determine exact preallocation count for (sequential) scalar matrix
7950          */
7951         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7952         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7953         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7954         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7955         PetscCall(PetscFree3(w0, w1, w2));
7956       } else if (ismpiaij) {
7957         Mat             Daij, Oaij;
7958         const PetscInt *garray;
7959         PetscInt        max_d_nnz;
7960 
7961         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7962         /*
7963          Determine exact preallocation count for diagonal block portion of scalar matrix
7964          */
7965         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7966         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7967         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7968         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7969         PetscCall(PetscFree3(w0, w1, w2));
7970         /*
7971          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7972          */
7973         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7974           o_nnz[jj] = 0;
7975           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7976             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7977             o_nnz[jj] += ncols;
7978             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7979           }
7980           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7981         }
7982       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7983       /* get scalar copy (norms) of matrix */
7984       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7985       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7986       PetscCall(PetscFree2(d_nnz, o_nnz));
7987       for (Ii = Istart; Ii < Iend; Ii++) {
7988         PetscInt dest_row = Ii / bs;
7989 
7990         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7991         for (jj = 0; jj < ncols; jj++) {
7992           PetscInt    dest_col = idx[jj] / bs;
7993           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7994 
7995           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7996         }
7997         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7998       }
7999       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8000       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8001     }
8002   } else {
8003     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8004     else {
8005       Gmat = Amat;
8006       PetscCall(PetscObjectReference((PetscObject)Gmat));
8007     }
8008     if (isseqaij) {
8009       a = Gmat;
8010       b = NULL;
8011     } else {
8012       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8013       a             = d->A;
8014       b             = d->B;
8015     }
8016     if (filter >= 0 || scale) {
8017       /* take absolute value of each entry */
8018       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8019         MatInfo      info;
8020         PetscScalar *avals;
8021 
8022         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8023         PetscCall(MatSeqAIJGetArray(c, &avals));
8024         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8025         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8026       }
8027     }
8028   }
8029   if (symmetrize) {
8030     PetscBool isset, issym;
8031 
8032     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8033     if (!isset || !issym) {
8034       Mat matTrans;
8035 
8036       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8037       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8038       PetscCall(MatDestroy(&matTrans));
8039     }
8040     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8041   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8042   if (scale) {
8043     /* scale c for all diagonal values = 1 or -1 */
8044     Vec diag;
8045 
8046     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8047     PetscCall(MatGetDiagonal(Gmat, diag));
8048     PetscCall(VecReciprocal(diag));
8049     PetscCall(VecSqrtAbs(diag));
8050     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8051     PetscCall(VecDestroy(&diag));
8052   }
8053   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8054   if (filter >= 0) {
8055     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8056     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8057   }
8058   *a_Gmat = Gmat;
8059   PetscFunctionReturn(PETSC_SUCCESS);
8060 }
8061 
8062 /*
8063     Special version for direct calls from Fortran
8064 */
8065 
8066 /* Change these macros so can be used in void function */
8067 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8068 #undef PetscCall
8069 #define PetscCall(...) \
8070   do { \
8071     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8072     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8073       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8074       return; \
8075     } \
8076   } while (0)
8077 
8078 #undef SETERRQ
8079 #define SETERRQ(comm, ierr, ...) \
8080   do { \
8081     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8082     return; \
8083   } while (0)
8084 
8085 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8086   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8087 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8088   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8089 #else
8090 #endif
8091 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8092 {
8093   Mat         mat = *mmat;
8094   PetscInt    m = *mm, n = *mn;
8095   InsertMode  addv = *maddv;
8096   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8097   PetscScalar value;
8098 
8099   MatCheckPreallocated(mat, 1);
8100   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8101   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8102   {
8103     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8104     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8105     PetscBool roworiented = aij->roworiented;
8106 
8107     /* Some Variables required in the macro */
8108     Mat         A     = aij->A;
8109     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8110     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8111     MatScalar  *aa;
8112     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8113     Mat         B                 = aij->B;
8114     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8115     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8116     MatScalar  *ba;
8117     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8118      * cannot use "#if defined" inside a macro. */
8119     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8120 
8121     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8122     PetscInt   nonew = a->nonew;
8123     MatScalar *ap1, *ap2;
8124 
8125     PetscFunctionBegin;
8126     PetscCall(MatSeqAIJGetArray(A, &aa));
8127     PetscCall(MatSeqAIJGetArray(B, &ba));
8128     for (i = 0; i < m; i++) {
8129       if (im[i] < 0) continue;
8130       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8131       if (im[i] >= rstart && im[i] < rend) {
8132         row      = im[i] - rstart;
8133         lastcol1 = -1;
8134         rp1      = aj + ai[row];
8135         ap1      = aa + ai[row];
8136         rmax1    = aimax[row];
8137         nrow1    = ailen[row];
8138         low1     = 0;
8139         high1    = nrow1;
8140         lastcol2 = -1;
8141         rp2      = bj + bi[row];
8142         ap2      = ba + bi[row];
8143         rmax2    = bimax[row];
8144         nrow2    = bilen[row];
8145         low2     = 0;
8146         high2    = nrow2;
8147 
8148         for (j = 0; j < n; j++) {
8149           if (roworiented) value = v[i * n + j];
8150           else value = v[i + j * m];
8151           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8152           if (in[j] >= cstart && in[j] < cend) {
8153             col = in[j] - cstart;
8154             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8155           } else if (in[j] < 0) continue;
8156           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8157             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8158           } else {
8159             if (mat->was_assembled) {
8160               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8161 #if defined(PETSC_USE_CTABLE)
8162               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8163               col--;
8164 #else
8165               col = aij->colmap[in[j]] - 1;
8166 #endif
8167               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8168                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8169                 col = in[j];
8170                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8171                 B        = aij->B;
8172                 b        = (Mat_SeqAIJ *)B->data;
8173                 bimax    = b->imax;
8174                 bi       = b->i;
8175                 bilen    = b->ilen;
8176                 bj       = b->j;
8177                 rp2      = bj + bi[row];
8178                 ap2      = ba + bi[row];
8179                 rmax2    = bimax[row];
8180                 nrow2    = bilen[row];
8181                 low2     = 0;
8182                 high2    = nrow2;
8183                 bm       = aij->B->rmap->n;
8184                 ba       = b->a;
8185                 inserted = PETSC_FALSE;
8186               }
8187             } else col = in[j];
8188             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8189           }
8190         }
8191       } else if (!aij->donotstash) {
8192         if (roworiented) {
8193           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8194         } else {
8195           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8196         }
8197       }
8198     }
8199     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8200     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8201   }
8202   PetscFunctionReturnVoid();
8203 }
8204 
8205 /* Undefining these here since they were redefined from their original definition above! No
8206  * other PETSc functions should be defined past this point, as it is impossible to recover the
8207  * original definitions */
8208 #undef PetscCall
8209 #undef SETERRQ
8210