xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision eb14dd146a89a5ef91eac0ce18ce74e06f74fd1f)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287   PetscMPIInt        in;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   PetscCall(PetscMPIIntCast(n, &in));
313   if (type == NORM_INFINITY) {
314     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
315   } else {
316     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
317   }
318   PetscCall(PetscFree(work));
319   if (type == NORM_2) {
320     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
321   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
322     for (i = 0; i < n; i++) reductions[i] /= m;
323   }
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
328 {
329   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
330   IS              sis, gis;
331   const PetscInt *isis, *igis;
332   PetscInt        n, *iis, nsis, ngis, rstart, i;
333 
334   PetscFunctionBegin;
335   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
336   PetscCall(MatFindNonzeroRows(a->B, &gis));
337   PetscCall(ISGetSize(gis, &ngis));
338   PetscCall(ISGetSize(sis, &nsis));
339   PetscCall(ISGetIndices(sis, &isis));
340   PetscCall(ISGetIndices(gis, &igis));
341 
342   PetscCall(PetscMalloc1(ngis + nsis, &iis));
343   PetscCall(PetscArraycpy(iis, igis, ngis));
344   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
345   n = ngis + nsis;
346   PetscCall(PetscSortRemoveDupsInt(&n, iis));
347   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
348   for (i = 0; i < n; i++) iis[i] += rstart;
349   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
350 
351   PetscCall(ISRestoreIndices(sis, &isis));
352   PetscCall(ISRestoreIndices(gis, &igis));
353   PetscCall(ISDestroy(&sis));
354   PetscCall(ISDestroy(&gis));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*
359   Local utility routine that creates a mapping from the global column
360 number to the local number in the off-diagonal part of the local
361 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
362 a slightly higher hash table cost; without it it is not scalable (each processor
363 has an order N integer array but is fast to access.
364 */
365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
366 {
367   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
368   PetscInt    n   = aij->B->cmap->n, i;
369 
370   PetscFunctionBegin;
371   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
372 #if defined(PETSC_USE_CTABLE)
373   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
374   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
375 #else
376   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
377   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
378 #endif
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
383   do { \
384     if (col <= lastcol1) low1 = 0; \
385     else high1 = nrow1; \
386     lastcol1 = col; \
387     while (high1 - low1 > 5) { \
388       t = (low1 + high1) / 2; \
389       if (rp1[t] > col) high1 = t; \
390       else low1 = t; \
391     } \
392     for (_i = low1; _i < high1; _i++) { \
393       if (rp1[_i] > col) break; \
394       if (rp1[_i] == col) { \
395         if (addv == ADD_VALUES) { \
396           ap1[_i] += value; \
397           /* Not sure LogFlops will slow dow the code or not */ \
398           (void)PetscLogFlops(1.0); \
399         } else ap1[_i] = value; \
400         goto a_noinsert; \
401       } \
402     } \
403     if (value == 0.0 && ignorezeroentries && row != col) { \
404       low1  = 0; \
405       high1 = nrow1; \
406       goto a_noinsert; \
407     } \
408     if (nonew == 1) { \
409       low1  = 0; \
410       high1 = nrow1; \
411       goto a_noinsert; \
412     } \
413     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
414     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
415     N = nrow1++ - 1; \
416     a->nz++; \
417     high1++; \
418     /* shift up all the later entries in this row */ \
419     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
420     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
421     rp1[_i] = col; \
422     ap1[_i] = value; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
584               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscInt           n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     break;
1690   case MAT_SUBMAT_SINGLEIS:
1691     A->submat_singleis = flg;
1692     break;
1693   case MAT_STRUCTURE_ONLY:
1694     /* The option is handled directly by MatSetOption() */
1695     break;
1696   default:
1697     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1698   }
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1703 {
1704   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1705   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1706   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1707   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1708   PetscInt    *cmap, *idx_p;
1709 
1710   PetscFunctionBegin;
1711   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1712   mat->getrowactive = PETSC_TRUE;
1713 
1714   if (!mat->rowvalues && (idx || v)) {
1715     /*
1716         allocate enough space to hold information from the longest row.
1717     */
1718     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1719     PetscInt    max = 1, tmp;
1720     for (i = 0; i < matin->rmap->n; i++) {
1721       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1722       if (max < tmp) max = tmp;
1723     }
1724     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1725   }
1726 
1727   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1728   lrow = row - rstart;
1729 
1730   pvA = &vworkA;
1731   pcA = &cworkA;
1732   pvB = &vworkB;
1733   pcB = &cworkB;
1734   if (!v) {
1735     pvA = NULL;
1736     pvB = NULL;
1737   }
1738   if (!idx) {
1739     pcA = NULL;
1740     if (!v) pcB = NULL;
1741   }
1742   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1743   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1744   nztot = nzA + nzB;
1745 
1746   cmap = mat->garray;
1747   if (v || idx) {
1748     if (nztot) {
1749       /* Sort by increasing column numbers, assuming A and B already sorted */
1750       PetscInt imark = -1;
1751       if (v) {
1752         *v = v_p = mat->rowvalues;
1753         for (i = 0; i < nzB; i++) {
1754           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1755           else break;
1756         }
1757         imark = i;
1758         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1759         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1760       }
1761       if (idx) {
1762         *idx = idx_p = mat->rowindices;
1763         if (imark > -1) {
1764           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1765         } else {
1766           for (i = 0; i < nzB; i++) {
1767             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1768             else break;
1769           }
1770           imark = i;
1771         }
1772         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1773         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1774       }
1775     } else {
1776       if (idx) *idx = NULL;
1777       if (v) *v = NULL;
1778     }
1779   }
1780   *nz = nztot;
1781   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1782   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1783   PetscFunctionReturn(PETSC_SUCCESS);
1784 }
1785 
1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1787 {
1788   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1789 
1790   PetscFunctionBegin;
1791   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1792   aij->getrowactive = PETSC_FALSE;
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1797 {
1798   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1799   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1800   PetscInt         i, j, cstart = mat->cmap->rstart;
1801   PetscReal        sum = 0.0;
1802   const MatScalar *v, *amata, *bmata;
1803   PetscMPIInt      iN;
1804 
1805   PetscFunctionBegin;
1806   if (aij->size == 1) {
1807     PetscCall(MatNorm(aij->A, type, norm));
1808   } else {
1809     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1810     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1811     if (type == NORM_FROBENIUS) {
1812       v = amata;
1813       for (i = 0; i < amat->nz; i++) {
1814         sum += PetscRealPart(PetscConj(*v) * (*v));
1815         v++;
1816       }
1817       v = bmata;
1818       for (i = 0; i < bmat->nz; i++) {
1819         sum += PetscRealPart(PetscConj(*v) * (*v));
1820         v++;
1821       }
1822       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1823       *norm = PetscSqrtReal(*norm);
1824       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1825     } else if (type == NORM_1) { /* max column norm */
1826       PetscReal *tmp, *tmp2;
1827       PetscInt  *jj, *garray = aij->garray;
1828       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1829       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1830       *norm = 0.0;
1831       v     = amata;
1832       jj    = amat->j;
1833       for (j = 0; j < amat->nz; j++) {
1834         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1835         v++;
1836       }
1837       v  = bmata;
1838       jj = bmat->j;
1839       for (j = 0; j < bmat->nz; j++) {
1840         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1841         v++;
1842       }
1843       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1844       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1845       for (j = 0; j < mat->cmap->N; j++) {
1846         if (tmp2[j] > *norm) *norm = tmp2[j];
1847       }
1848       PetscCall(PetscFree(tmp));
1849       PetscCall(PetscFree(tmp2));
1850       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1851     } else if (type == NORM_INFINITY) { /* max row norm */
1852       PetscReal ntemp = 0.0;
1853       for (j = 0; j < aij->A->rmap->n; j++) {
1854         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1855         sum = 0.0;
1856         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1857           sum += PetscAbsScalar(*v);
1858           v++;
1859         }
1860         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1861         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1862           sum += PetscAbsScalar(*v);
1863           v++;
1864         }
1865         if (sum > ntemp) ntemp = sum;
1866       }
1867       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1868       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1869     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1870     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1871     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1877 {
1878   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1879   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1880   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1881   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1882   Mat              B, A_diag, *B_diag;
1883   const MatScalar *pbv, *bv;
1884 
1885   PetscFunctionBegin;
1886   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1887   ma = A->rmap->n;
1888   na = A->cmap->n;
1889   mb = a->B->rmap->n;
1890   nb = a->B->cmap->n;
1891   ai = Aloc->i;
1892   aj = Aloc->j;
1893   bi = Bloc->i;
1894   bj = Bloc->j;
1895   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1896     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1897     PetscSFNode         *oloc;
1898     PETSC_UNUSED PetscSF sf;
1899 
1900     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1901     /* compute d_nnz for preallocation */
1902     PetscCall(PetscArrayzero(d_nnz, na));
1903     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1904     /* compute local off-diagonal contributions */
1905     PetscCall(PetscArrayzero(g_nnz, nb));
1906     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1907     /* map those to global */
1908     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1909     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1910     PetscCall(PetscSFSetFromOptions(sf));
1911     PetscCall(PetscArrayzero(o_nnz, na));
1912     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1913     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1914     PetscCall(PetscSFDestroy(&sf));
1915 
1916     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1917     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1918     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1919     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1920     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1921     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1922   } else {
1923     B = *matout;
1924     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1925   }
1926 
1927   b           = (Mat_MPIAIJ *)B->data;
1928   A_diag      = a->A;
1929   B_diag      = &b->A;
1930   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1931   A_diag_ncol = A_diag->cmap->N;
1932   B_diag_ilen = sub_B_diag->ilen;
1933   B_diag_i    = sub_B_diag->i;
1934 
1935   /* Set ilen for diagonal of B */
1936   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1937 
1938   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1939   very quickly (=without using MatSetValues), because all writes are local. */
1940   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1941   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1942 
1943   /* copy over the B part */
1944   PetscCall(PetscMalloc1(bi[mb], &cols));
1945   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1946   pbv = bv;
1947   row = A->rmap->rstart;
1948   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1949   cols_tmp = cols;
1950   for (i = 0; i < mb; i++) {
1951     ncol = bi[i + 1] - bi[i];
1952     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1953     row++;
1954     if (pbv) pbv += ncol;
1955     if (cols_tmp) cols_tmp += ncol;
1956   }
1957   PetscCall(PetscFree(cols));
1958   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1959 
1960   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1961   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1962   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1963     *matout = B;
1964   } else {
1965     PetscCall(MatHeaderMerge(A, &B));
1966   }
1967   PetscFunctionReturn(PETSC_SUCCESS);
1968 }
1969 
1970 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1971 {
1972   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1973   Mat         a = aij->A, b = aij->B;
1974   PetscInt    s1, s2, s3;
1975 
1976   PetscFunctionBegin;
1977   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1978   if (rr) {
1979     PetscCall(VecGetLocalSize(rr, &s1));
1980     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1981     /* Overlap communication with computation. */
1982     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1983   }
1984   if (ll) {
1985     PetscCall(VecGetLocalSize(ll, &s1));
1986     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1987     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1988   }
1989   /* scale  the diagonal block */
1990   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1991 
1992   if (rr) {
1993     /* Do a scatter end and then right scale the off-diagonal block */
1994     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1995     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1996   }
1997   PetscFunctionReturn(PETSC_SUCCESS);
1998 }
1999 
2000 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2001 {
2002   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2003 
2004   PetscFunctionBegin;
2005   PetscCall(MatSetUnfactored(a->A));
2006   PetscFunctionReturn(PETSC_SUCCESS);
2007 }
2008 
2009 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2010 {
2011   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2012   Mat         a, b, c, d;
2013   PetscBool   flg;
2014 
2015   PetscFunctionBegin;
2016   a = matA->A;
2017   b = matA->B;
2018   c = matB->A;
2019   d = matB->B;
2020 
2021   PetscCall(MatEqual(a, c, &flg));
2022   if (flg) PetscCall(MatEqual(b, d, &flg));
2023   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2024   PetscFunctionReturn(PETSC_SUCCESS);
2025 }
2026 
2027 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2028 {
2029   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2030   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2031 
2032   PetscFunctionBegin;
2033   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2034   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2035     /* because of the column compression in the off-processor part of the matrix a->B,
2036        the number of columns in a->B and b->B may be different, hence we cannot call
2037        the MatCopy() directly on the two parts. If need be, we can provide a more
2038        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2039        then copying the submatrices */
2040     PetscCall(MatCopy_Basic(A, B, str));
2041   } else {
2042     PetscCall(MatCopy(a->A, b->A, str));
2043     PetscCall(MatCopy(a->B, b->B, str));
2044   }
2045   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2046   PetscFunctionReturn(PETSC_SUCCESS);
2047 }
2048 
2049 /*
2050    Computes the number of nonzeros per row needed for preallocation when X and Y
2051    have different nonzero structure.
2052 */
2053 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2054 {
2055   PetscInt i, j, k, nzx, nzy;
2056 
2057   PetscFunctionBegin;
2058   /* Set the number of nonzeros in the new matrix */
2059   for (i = 0; i < m; i++) {
2060     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2061     nzx    = xi[i + 1] - xi[i];
2062     nzy    = yi[i + 1] - yi[i];
2063     nnz[i] = 0;
2064     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2065       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2066       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2067       nnz[i]++;
2068     }
2069     for (; k < nzy; k++) nnz[i]++;
2070   }
2071   PetscFunctionReturn(PETSC_SUCCESS);
2072 }
2073 
2074 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2075 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2076 {
2077   PetscInt    m = Y->rmap->N;
2078   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2079   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2080 
2081   PetscFunctionBegin;
2082   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2083   PetscFunctionReturn(PETSC_SUCCESS);
2084 }
2085 
2086 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2087 {
2088   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2089 
2090   PetscFunctionBegin;
2091   if (str == SAME_NONZERO_PATTERN) {
2092     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2093     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2094   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2095     PetscCall(MatAXPY_Basic(Y, a, X, str));
2096   } else {
2097     Mat       B;
2098     PetscInt *nnz_d, *nnz_o;
2099 
2100     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2101     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2102     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2103     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2104     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2105     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2106     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2107     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2108     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2109     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2110     PetscCall(MatHeaderMerge(Y, &B));
2111     PetscCall(PetscFree(nnz_d));
2112     PetscCall(PetscFree(nnz_o));
2113   }
2114   PetscFunctionReturn(PETSC_SUCCESS);
2115 }
2116 
2117 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2118 
2119 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2120 {
2121   PetscFunctionBegin;
2122   if (PetscDefined(USE_COMPLEX)) {
2123     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2124 
2125     PetscCall(MatConjugate_SeqAIJ(aij->A));
2126     PetscCall(MatConjugate_SeqAIJ(aij->B));
2127   }
2128   PetscFunctionReturn(PETSC_SUCCESS);
2129 }
2130 
2131 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2132 {
2133   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2134 
2135   PetscFunctionBegin;
2136   PetscCall(MatRealPart(a->A));
2137   PetscCall(MatRealPart(a->B));
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2144 
2145   PetscFunctionBegin;
2146   PetscCall(MatImaginaryPart(a->A));
2147   PetscCall(MatImaginaryPart(a->B));
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2152 {
2153   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2154   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2155   PetscScalar       *va, *vv;
2156   Vec                vB, vA;
2157   const PetscScalar *vb;
2158 
2159   PetscFunctionBegin;
2160   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2161   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2162 
2163   PetscCall(VecGetArrayWrite(vA, &va));
2164   if (idx) {
2165     for (i = 0; i < m; i++) {
2166       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2167     }
2168   }
2169 
2170   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2171   PetscCall(PetscMalloc1(m, &idxb));
2172   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2173 
2174   PetscCall(VecGetArrayWrite(v, &vv));
2175   PetscCall(VecGetArrayRead(vB, &vb));
2176   for (i = 0; i < m; i++) {
2177     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2178       vv[i] = vb[i];
2179       if (idx) idx[i] = a->garray[idxb[i]];
2180     } else {
2181       vv[i] = va[i];
2182       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2183     }
2184   }
2185   PetscCall(VecRestoreArrayWrite(vA, &vv));
2186   PetscCall(VecRestoreArrayWrite(vA, &va));
2187   PetscCall(VecRestoreArrayRead(vB, &vb));
2188   PetscCall(PetscFree(idxb));
2189   PetscCall(VecDestroy(&vA));
2190   PetscCall(VecDestroy(&vB));
2191   PetscFunctionReturn(PETSC_SUCCESS);
2192 }
2193 
2194 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2195 {
2196   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2197   Vec         vB, vA;
2198 
2199   PetscFunctionBegin;
2200   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2201   PetscCall(MatGetRowSumAbs(a->A, vA));
2202   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2203   PetscCall(MatGetRowSumAbs(a->B, vB));
2204   PetscCall(VecAXPY(vA, 1.0, vB));
2205   PetscCall(VecDestroy(&vB));
2206   PetscCall(VecCopy(vA, v));
2207   PetscCall(VecDestroy(&vA));
2208   PetscFunctionReturn(PETSC_SUCCESS);
2209 }
2210 
2211 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2212 {
2213   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2214   PetscInt           m = A->rmap->n, n = A->cmap->n;
2215   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2216   PetscInt          *cmap = mat->garray;
2217   PetscInt          *diagIdx, *offdiagIdx;
2218   Vec                diagV, offdiagV;
2219   PetscScalar       *a, *diagA, *offdiagA;
2220   const PetscScalar *ba, *bav;
2221   PetscInt           r, j, col, ncols, *bi, *bj;
2222   Mat                B = mat->B;
2223   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2224 
2225   PetscFunctionBegin;
2226   /* When a process holds entire A and other processes have no entry */
2227   if (A->cmap->N == n) {
2228     PetscCall(VecGetArrayWrite(v, &diagA));
2229     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2230     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2231     PetscCall(VecDestroy(&diagV));
2232     PetscCall(VecRestoreArrayWrite(v, &diagA));
2233     PetscFunctionReturn(PETSC_SUCCESS);
2234   } else if (n == 0) {
2235     if (m) {
2236       PetscCall(VecGetArrayWrite(v, &a));
2237       for (r = 0; r < m; r++) {
2238         a[r] = 0.0;
2239         if (idx) idx[r] = -1;
2240       }
2241       PetscCall(VecRestoreArrayWrite(v, &a));
2242     }
2243     PetscFunctionReturn(PETSC_SUCCESS);
2244   }
2245 
2246   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2249   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2250 
2251   /* Get offdiagIdx[] for implicit 0.0 */
2252   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2253   ba = bav;
2254   bi = b->i;
2255   bj = b->j;
2256   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2257   for (r = 0; r < m; r++) {
2258     ncols = bi[r + 1] - bi[r];
2259     if (ncols == A->cmap->N - n) { /* Brow is dense */
2260       offdiagA[r]   = *ba;
2261       offdiagIdx[r] = cmap[0];
2262     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2263       offdiagA[r] = 0.0;
2264 
2265       /* Find first hole in the cmap */
2266       for (j = 0; j < ncols; j++) {
2267         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2268         if (col > j && j < cstart) {
2269           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2270           break;
2271         } else if (col > j + n && j >= cstart) {
2272           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2273           break;
2274         }
2275       }
2276       if (j == ncols && ncols < A->cmap->N - n) {
2277         /* a hole is outside compressed Bcols */
2278         if (ncols == 0) {
2279           if (cstart) {
2280             offdiagIdx[r] = 0;
2281           } else offdiagIdx[r] = cend;
2282         } else { /* ncols > 0 */
2283           offdiagIdx[r] = cmap[ncols - 1] + 1;
2284           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2285         }
2286       }
2287     }
2288 
2289     for (j = 0; j < ncols; j++) {
2290       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2291         offdiagA[r]   = *ba;
2292         offdiagIdx[r] = cmap[*bj];
2293       }
2294       ba++;
2295       bj++;
2296     }
2297   }
2298 
2299   PetscCall(VecGetArrayWrite(v, &a));
2300   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2301   for (r = 0; r < m; ++r) {
2302     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2303       a[r] = diagA[r];
2304       if (idx) idx[r] = cstart + diagIdx[r];
2305     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2306       a[r] = diagA[r];
2307       if (idx) {
2308         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2309           idx[r] = cstart + diagIdx[r];
2310         } else idx[r] = offdiagIdx[r];
2311       }
2312     } else {
2313       a[r] = offdiagA[r];
2314       if (idx) idx[r] = offdiagIdx[r];
2315     }
2316   }
2317   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2318   PetscCall(VecRestoreArrayWrite(v, &a));
2319   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2320   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2321   PetscCall(VecDestroy(&diagV));
2322   PetscCall(VecDestroy(&offdiagV));
2323   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2324   PetscFunctionReturn(PETSC_SUCCESS);
2325 }
2326 
2327 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2328 {
2329   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2330   PetscInt           m = A->rmap->n, n = A->cmap->n;
2331   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2332   PetscInt          *cmap = mat->garray;
2333   PetscInt          *diagIdx, *offdiagIdx;
2334   Vec                diagV, offdiagV;
2335   PetscScalar       *a, *diagA, *offdiagA;
2336   const PetscScalar *ba, *bav;
2337   PetscInt           r, j, col, ncols, *bi, *bj;
2338   Mat                B = mat->B;
2339   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2340 
2341   PetscFunctionBegin;
2342   /* When a process holds entire A and other processes have no entry */
2343   if (A->cmap->N == n) {
2344     PetscCall(VecGetArrayWrite(v, &diagA));
2345     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2346     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2347     PetscCall(VecDestroy(&diagV));
2348     PetscCall(VecRestoreArrayWrite(v, &diagA));
2349     PetscFunctionReturn(PETSC_SUCCESS);
2350   } else if (n == 0) {
2351     if (m) {
2352       PetscCall(VecGetArrayWrite(v, &a));
2353       for (r = 0; r < m; r++) {
2354         a[r] = PETSC_MAX_REAL;
2355         if (idx) idx[r] = -1;
2356       }
2357       PetscCall(VecRestoreArrayWrite(v, &a));
2358     }
2359     PetscFunctionReturn(PETSC_SUCCESS);
2360   }
2361 
2362   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2363   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2364   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2365   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2366 
2367   /* Get offdiagIdx[] for implicit 0.0 */
2368   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2369   ba = bav;
2370   bi = b->i;
2371   bj = b->j;
2372   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2373   for (r = 0; r < m; r++) {
2374     ncols = bi[r + 1] - bi[r];
2375     if (ncols == A->cmap->N - n) { /* Brow is dense */
2376       offdiagA[r]   = *ba;
2377       offdiagIdx[r] = cmap[0];
2378     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2379       offdiagA[r] = 0.0;
2380 
2381       /* Find first hole in the cmap */
2382       for (j = 0; j < ncols; j++) {
2383         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2384         if (col > j && j < cstart) {
2385           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2386           break;
2387         } else if (col > j + n && j >= cstart) {
2388           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2389           break;
2390         }
2391       }
2392       if (j == ncols && ncols < A->cmap->N - n) {
2393         /* a hole is outside compressed Bcols */
2394         if (ncols == 0) {
2395           if (cstart) {
2396             offdiagIdx[r] = 0;
2397           } else offdiagIdx[r] = cend;
2398         } else { /* ncols > 0 */
2399           offdiagIdx[r] = cmap[ncols - 1] + 1;
2400           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2401         }
2402       }
2403     }
2404 
2405     for (j = 0; j < ncols; j++) {
2406       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2407         offdiagA[r]   = *ba;
2408         offdiagIdx[r] = cmap[*bj];
2409       }
2410       ba++;
2411       bj++;
2412     }
2413   }
2414 
2415   PetscCall(VecGetArrayWrite(v, &a));
2416   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2417   for (r = 0; r < m; ++r) {
2418     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) idx[r] = cstart + diagIdx[r];
2421     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2422       a[r] = diagA[r];
2423       if (idx) {
2424         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2425           idx[r] = cstart + diagIdx[r];
2426         } else idx[r] = offdiagIdx[r];
2427       }
2428     } else {
2429       a[r] = offdiagA[r];
2430       if (idx) idx[r] = offdiagIdx[r];
2431     }
2432   }
2433   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2434   PetscCall(VecRestoreArrayWrite(v, &a));
2435   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2436   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2437   PetscCall(VecDestroy(&diagV));
2438   PetscCall(VecDestroy(&offdiagV));
2439   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2440   PetscFunctionReturn(PETSC_SUCCESS);
2441 }
2442 
2443 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2444 {
2445   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2446   PetscInt           m = A->rmap->n, n = A->cmap->n;
2447   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2448   PetscInt          *cmap = mat->garray;
2449   PetscInt          *diagIdx, *offdiagIdx;
2450   Vec                diagV, offdiagV;
2451   PetscScalar       *a, *diagA, *offdiagA;
2452   const PetscScalar *ba, *bav;
2453   PetscInt           r, j, col, ncols, *bi, *bj;
2454   Mat                B = mat->B;
2455   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2456 
2457   PetscFunctionBegin;
2458   /* When a process holds entire A and other processes have no entry */
2459   if (A->cmap->N == n) {
2460     PetscCall(VecGetArrayWrite(v, &diagA));
2461     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2462     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2463     PetscCall(VecDestroy(&diagV));
2464     PetscCall(VecRestoreArrayWrite(v, &diagA));
2465     PetscFunctionReturn(PETSC_SUCCESS);
2466   } else if (n == 0) {
2467     if (m) {
2468       PetscCall(VecGetArrayWrite(v, &a));
2469       for (r = 0; r < m; r++) {
2470         a[r] = PETSC_MIN_REAL;
2471         if (idx) idx[r] = -1;
2472       }
2473       PetscCall(VecRestoreArrayWrite(v, &a));
2474     }
2475     PetscFunctionReturn(PETSC_SUCCESS);
2476   }
2477 
2478   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2479   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2480   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2481   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2482 
2483   /* Get offdiagIdx[] for implicit 0.0 */
2484   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2485   ba = bav;
2486   bi = b->i;
2487   bj = b->j;
2488   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2489   for (r = 0; r < m; r++) {
2490     ncols = bi[r + 1] - bi[r];
2491     if (ncols == A->cmap->N - n) { /* Brow is dense */
2492       offdiagA[r]   = *ba;
2493       offdiagIdx[r] = cmap[0];
2494     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2495       offdiagA[r] = 0.0;
2496 
2497       /* Find first hole in the cmap */
2498       for (j = 0; j < ncols; j++) {
2499         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2500         if (col > j && j < cstart) {
2501           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2502           break;
2503         } else if (col > j + n && j >= cstart) {
2504           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2505           break;
2506         }
2507       }
2508       if (j == ncols && ncols < A->cmap->N - n) {
2509         /* a hole is outside compressed Bcols */
2510         if (ncols == 0) {
2511           if (cstart) {
2512             offdiagIdx[r] = 0;
2513           } else offdiagIdx[r] = cend;
2514         } else { /* ncols > 0 */
2515           offdiagIdx[r] = cmap[ncols - 1] + 1;
2516           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2517         }
2518       }
2519     }
2520 
2521     for (j = 0; j < ncols; j++) {
2522       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2523         offdiagA[r]   = *ba;
2524         offdiagIdx[r] = cmap[*bj];
2525       }
2526       ba++;
2527       bj++;
2528     }
2529   }
2530 
2531   PetscCall(VecGetArrayWrite(v, &a));
2532   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2533   for (r = 0; r < m; ++r) {
2534     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2535       a[r] = diagA[r];
2536       if (idx) idx[r] = cstart + diagIdx[r];
2537     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2538       a[r] = diagA[r];
2539       if (idx) {
2540         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2541           idx[r] = cstart + diagIdx[r];
2542         } else idx[r] = offdiagIdx[r];
2543       }
2544     } else {
2545       a[r] = offdiagA[r];
2546       if (idx) idx[r] = offdiagIdx[r];
2547     }
2548   }
2549   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2550   PetscCall(VecRestoreArrayWrite(v, &a));
2551   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2552   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2553   PetscCall(VecDestroy(&diagV));
2554   PetscCall(VecDestroy(&offdiagV));
2555   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2556   PetscFunctionReturn(PETSC_SUCCESS);
2557 }
2558 
2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2560 {
2561   Mat *dummy;
2562 
2563   PetscFunctionBegin;
2564   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2565   *newmat = *dummy;
2566   PetscCall(PetscFree(dummy));
2567   PetscFunctionReturn(PETSC_SUCCESS);
2568 }
2569 
2570 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2571 {
2572   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2573 
2574   PetscFunctionBegin;
2575   PetscCall(MatInvertBlockDiagonal(a->A, values));
2576   A->factorerrortype = a->A->factorerrortype;
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2581 {
2582   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2583 
2584   PetscFunctionBegin;
2585   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2586   PetscCall(MatSetRandom(aij->A, rctx));
2587   if (x->assembled) {
2588     PetscCall(MatSetRandom(aij->B, rctx));
2589   } else {
2590     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2591   }
2592   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2593   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2598 {
2599   PetscFunctionBegin;
2600   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2601   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2602   PetscFunctionReturn(PETSC_SUCCESS);
2603 }
2604 
2605 /*@
2606   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2607 
2608   Not Collective
2609 
2610   Input Parameter:
2611 . A - the matrix
2612 
2613   Output Parameter:
2614 . nz - the number of nonzeros
2615 
2616   Level: advanced
2617 
2618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2619 @*/
2620 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2621 {
2622   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2623   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2624   PetscBool   isaij;
2625 
2626   PetscFunctionBegin;
2627   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2628   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2629   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2630   PetscFunctionReturn(PETSC_SUCCESS);
2631 }
2632 
2633 /*@
2634   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2635 
2636   Collective
2637 
2638   Input Parameters:
2639 + A  - the matrix
2640 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2641 
2642   Level: advanced
2643 
2644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2645 @*/
2646 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2647 {
2648   PetscFunctionBegin;
2649   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
2653 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2654 {
2655   PetscBool sc = PETSC_FALSE, flg;
2656 
2657   PetscFunctionBegin;
2658   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2659   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2660   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2661   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2662   PetscOptionsHeadEnd();
2663   PetscFunctionReturn(PETSC_SUCCESS);
2664 }
2665 
2666 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2667 {
2668   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2669   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2670 
2671   PetscFunctionBegin;
2672   if (!Y->preallocated) {
2673     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2674   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2675     PetscInt nonew = aij->nonew;
2676     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2677     aij->nonew = nonew;
2678   }
2679   PetscCall(MatShift_Basic(Y, a));
2680   PetscFunctionReturn(PETSC_SUCCESS);
2681 }
2682 
2683 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2684 {
2685   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2686 
2687   PetscFunctionBegin;
2688   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2689   PetscCall(MatMissingDiagonal(a->A, missing, d));
2690   if (d) {
2691     PetscInt rstart;
2692     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2693     *d += rstart;
2694   }
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2704   PetscFunctionReturn(PETSC_SUCCESS);
2705 }
2706 
2707 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2708 {
2709   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2710 
2711   PetscFunctionBegin;
2712   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2713   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2714   PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716 
2717 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2718                                        MatGetRow_MPIAIJ,
2719                                        MatRestoreRow_MPIAIJ,
2720                                        MatMult_MPIAIJ,
2721                                        /* 4*/ MatMultAdd_MPIAIJ,
2722                                        MatMultTranspose_MPIAIJ,
2723                                        MatMultTransposeAdd_MPIAIJ,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        /*10*/ NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        MatSOR_MPIAIJ,
2731                                        MatTranspose_MPIAIJ,
2732                                        /*15*/ MatGetInfo_MPIAIJ,
2733                                        MatEqual_MPIAIJ,
2734                                        MatGetDiagonal_MPIAIJ,
2735                                        MatDiagonalScale_MPIAIJ,
2736                                        MatNorm_MPIAIJ,
2737                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2738                                        MatAssemblyEnd_MPIAIJ,
2739                                        MatSetOption_MPIAIJ,
2740                                        MatZeroEntries_MPIAIJ,
2741                                        /*24*/ MatZeroRows_MPIAIJ,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        /*29*/ MatSetUp_MPI_Hash,
2747                                        NULL,
2748                                        NULL,
2749                                        MatGetDiagonalBlock_MPIAIJ,
2750                                        NULL,
2751                                        /*34*/ MatDuplicate_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        /*39*/ MatAXPY_MPIAIJ,
2757                                        MatCreateSubMatrices_MPIAIJ,
2758                                        MatIncreaseOverlap_MPIAIJ,
2759                                        MatGetValues_MPIAIJ,
2760                                        MatCopy_MPIAIJ,
2761                                        /*44*/ MatGetRowMax_MPIAIJ,
2762                                        MatScale_MPIAIJ,
2763                                        MatShift_MPIAIJ,
2764                                        MatDiagonalSet_MPIAIJ,
2765                                        MatZeroRowsColumns_MPIAIJ,
2766                                        /*49*/ MatSetRandom_MPIAIJ,
2767                                        MatGetRowIJ_MPIAIJ,
2768                                        MatRestoreRowIJ_MPIAIJ,
2769                                        NULL,
2770                                        NULL,
2771                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2772                                        NULL,
2773                                        MatSetUnfactored_MPIAIJ,
2774                                        MatPermute_MPIAIJ,
2775                                        NULL,
2776                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2777                                        MatDestroy_MPIAIJ,
2778                                        MatView_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        /*64*/ NULL,
2782                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                        NULL,
2786                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2787                                        MatGetRowMinAbs_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        /*75*/ MatFDColoringApply_AIJ,
2793                                        MatSetFromOptions_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        MatFindZeroDiagonals_MPIAIJ,
2797                                        /*80*/ NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        /*83*/ MatLoad_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*89*/ NULL,
2807                                        NULL,
2808                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2812                                        NULL,
2813                                        NULL,
2814                                        NULL,
2815                                        MatBindToCPU_MPIAIJ,
2816                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2817                                        NULL,
2818                                        NULL,
2819                                        MatConjugate_MPIAIJ,
2820                                        NULL,
2821                                        /*104*/ MatSetValuesRow_MPIAIJ,
2822                                        MatRealPart_MPIAIJ,
2823                                        MatImaginaryPart_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        /*109*/ NULL,
2827                                        NULL,
2828                                        MatGetRowMin_MPIAIJ,
2829                                        NULL,
2830                                        MatMissingDiagonal_MPIAIJ,
2831                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2832                                        NULL,
2833                                        MatGetGhosts_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2837                                        NULL,
2838                                        NULL,
2839                                        NULL,
2840                                        MatGetMultiProcBlock_MPIAIJ,
2841                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2842                                        MatGetColumnReductions_MPIAIJ,
2843                                        MatInvertBlockDiagonal_MPIAIJ,
2844                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2845                                        MatCreateSubMatricesMPI_MPIAIJ,
2846                                        /*129*/ NULL,
2847                                        NULL,
2848                                        NULL,
2849                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2850                                        NULL,
2851                                        /*134*/ NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2857                                        NULL,
2858                                        NULL,
2859                                        MatFDColoringSetUp_MPIXAIJ,
2860                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2861                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2862                                        /*145*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatCreateGraph_Simple_AIJ,
2866                                        NULL,
2867                                        /*150*/ NULL,
2868                                        MatEliminateZeros_MPIAIJ,
2869                                        MatGetRowSumAbs_MPIAIJ,
2870                                        NULL,
2871                                        NULL,
2872                                        NULL};
2873 
2874 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2875 {
2876   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2877 
2878   PetscFunctionBegin;
2879   PetscCall(MatStoreValues(aij->A));
2880   PetscCall(MatStoreValues(aij->B));
2881   PetscFunctionReturn(PETSC_SUCCESS);
2882 }
2883 
2884 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2885 {
2886   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2887 
2888   PetscFunctionBegin;
2889   PetscCall(MatRetrieveValues(aij->A));
2890   PetscCall(MatRetrieveValues(aij->B));
2891   PetscFunctionReturn(PETSC_SUCCESS);
2892 }
2893 
2894 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2895 {
2896   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2897   PetscMPIInt size;
2898 
2899   PetscFunctionBegin;
2900   if (B->hash_active) {
2901     B->ops[0]      = b->cops;
2902     B->hash_active = PETSC_FALSE;
2903   }
2904   PetscCall(PetscLayoutSetUp(B->rmap));
2905   PetscCall(PetscLayoutSetUp(B->cmap));
2906 
2907 #if defined(PETSC_USE_CTABLE)
2908   PetscCall(PetscHMapIDestroy(&b->colmap));
2909 #else
2910   PetscCall(PetscFree(b->colmap));
2911 #endif
2912   PetscCall(PetscFree(b->garray));
2913   PetscCall(VecDestroy(&b->lvec));
2914   PetscCall(VecScatterDestroy(&b->Mvctx));
2915 
2916   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2917 
2918   MatSeqXAIJGetOptions_Private(b->B);
2919   PetscCall(MatDestroy(&b->B));
2920   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2921   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2922   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2923   PetscCall(MatSetType(b->B, MATSEQAIJ));
2924   MatSeqXAIJRestoreOptions_Private(b->B);
2925 
2926   MatSeqXAIJGetOptions_Private(b->A);
2927   PetscCall(MatDestroy(&b->A));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2929   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2930   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2931   PetscCall(MatSetType(b->A, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->A);
2933 
2934   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2935   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2936   B->preallocated  = PETSC_TRUE;
2937   B->was_assembled = PETSC_FALSE;
2938   B->assembled     = PETSC_FALSE;
2939   PetscFunctionReturn(PETSC_SUCCESS);
2940 }
2941 
2942 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2943 {
2944   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2945 
2946   PetscFunctionBegin;
2947   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2948   PetscCall(PetscLayoutSetUp(B->rmap));
2949   PetscCall(PetscLayoutSetUp(B->cmap));
2950   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2951   else {
2952 #if defined(PETSC_USE_CTABLE)
2953     PetscCall(PetscHMapIDestroy(&b->colmap));
2954 #else
2955     PetscCall(PetscFree(b->colmap));
2956 #endif
2957     PetscCall(PetscFree(b->garray));
2958     PetscCall(VecDestroy(&b->lvec));
2959   }
2960   PetscCall(VecScatterDestroy(&b->Mvctx));
2961 
2962   PetscCall(MatResetPreallocation(b->A));
2963   PetscCall(MatResetPreallocation(b->B));
2964   B->preallocated  = PETSC_TRUE;
2965   B->was_assembled = PETSC_FALSE;
2966   B->assembled     = PETSC_FALSE;
2967   PetscFunctionReturn(PETSC_SUCCESS);
2968 }
2969 
2970 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2971 {
2972   Mat         mat;
2973   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2974 
2975   PetscFunctionBegin;
2976   *newmat = NULL;
2977   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2978   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2979   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2980   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2981   a = (Mat_MPIAIJ *)mat->data;
2982 
2983   mat->factortype = matin->factortype;
2984   mat->assembled  = matin->assembled;
2985   mat->insertmode = NOT_SET_VALUES;
2986 
2987   a->size         = oldmat->size;
2988   a->rank         = oldmat->rank;
2989   a->donotstash   = oldmat->donotstash;
2990   a->roworiented  = oldmat->roworiented;
2991   a->rowindices   = NULL;
2992   a->rowvalues    = NULL;
2993   a->getrowactive = PETSC_FALSE;
2994 
2995   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2996   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2997   if (matin->hash_active) {
2998     PetscCall(MatSetUp(mat));
2999   } else {
3000     mat->preallocated = matin->preallocated;
3001     if (oldmat->colmap) {
3002 #if defined(PETSC_USE_CTABLE)
3003       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3004 #else
3005       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3006       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3007 #endif
3008     } else a->colmap = NULL;
3009     if (oldmat->garray) {
3010       PetscInt len;
3011       len = oldmat->B->cmap->n;
3012       PetscCall(PetscMalloc1(len + 1, &a->garray));
3013       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3014     } else a->garray = NULL;
3015 
3016     /* It may happen MatDuplicate is called with a non-assembled matrix
3017       In fact, MatDuplicate only requires the matrix to be preallocated
3018       This may happen inside a DMCreateMatrix_Shell */
3019     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3020     if (oldmat->Mvctx) {
3021       a->Mvctx = oldmat->Mvctx;
3022       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3023     }
3024     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3025     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3026   }
3027   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3028   *newmat = mat;
3029   PetscFunctionReturn(PETSC_SUCCESS);
3030 }
3031 
3032 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3033 {
3034   PetscBool isbinary, ishdf5;
3035 
3036   PetscFunctionBegin;
3037   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3038   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3039   /* force binary viewer to load .info file if it has not yet done so */
3040   PetscCall(PetscViewerSetUp(viewer));
3041   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3043   if (isbinary) {
3044     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3045   } else if (ishdf5) {
3046 #if defined(PETSC_HAVE_HDF5)
3047     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3048 #else
3049     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3050 #endif
3051   } else {
3052     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3053   }
3054   PetscFunctionReturn(PETSC_SUCCESS);
3055 }
3056 
3057 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3058 {
3059   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3060   PetscInt    *rowidxs, *colidxs;
3061   PetscScalar *matvals;
3062 
3063   PetscFunctionBegin;
3064   PetscCall(PetscViewerSetUp(viewer));
3065 
3066   /* read in matrix header */
3067   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3068   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3069   M  = header[1];
3070   N  = header[2];
3071   nz = header[3];
3072   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3073   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3074   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3075 
3076   /* set block sizes from the viewer's .info file */
3077   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3078   /* set global sizes if not set already */
3079   if (mat->rmap->N < 0) mat->rmap->N = M;
3080   if (mat->cmap->N < 0) mat->cmap->N = N;
3081   PetscCall(PetscLayoutSetUp(mat->rmap));
3082   PetscCall(PetscLayoutSetUp(mat->cmap));
3083 
3084   /* check if the matrix sizes are correct */
3085   PetscCall(MatGetSize(mat, &rows, &cols));
3086   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3087 
3088   /* read in row lengths and build row indices */
3089   PetscCall(MatGetLocalSize(mat, &m, NULL));
3090   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3091   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3092   rowidxs[0] = 0;
3093   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3094   if (nz != PETSC_INT_MAX) {
3095     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3096     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3097   }
3098 
3099   /* read in column indices and matrix values */
3100   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3101   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3102   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3103   /* store matrix indices and values */
3104   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3105   PetscCall(PetscFree(rowidxs));
3106   PetscCall(PetscFree2(colidxs, matvals));
3107   PetscFunctionReturn(PETSC_SUCCESS);
3108 }
3109 
3110 /* Not scalable because of ISAllGather() unless getting all columns. */
3111 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3112 {
3113   IS          iscol_local;
3114   PetscBool   isstride;
3115   PetscMPIInt lisstride = 0, gisstride;
3116 
3117   PetscFunctionBegin;
3118   /* check if we are grabbing all columns*/
3119   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3120 
3121   if (isstride) {
3122     PetscInt start, len, mstart, mlen;
3123     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3124     PetscCall(ISGetLocalSize(iscol, &len));
3125     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3126     if (mstart == start && mlen - mstart == len) lisstride = 1;
3127   }
3128 
3129   PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3130   if (gisstride) {
3131     PetscInt N;
3132     PetscCall(MatGetSize(mat, NULL, &N));
3133     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3134     PetscCall(ISSetIdentity(iscol_local));
3135     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3136   } else {
3137     PetscInt cbs;
3138     PetscCall(ISGetBlockSize(iscol, &cbs));
3139     PetscCall(ISAllGather(iscol, &iscol_local));
3140     PetscCall(ISSetBlockSize(iscol_local, cbs));
3141   }
3142 
3143   *isseq = iscol_local;
3144   PetscFunctionReturn(PETSC_SUCCESS);
3145 }
3146 
3147 /*
3148  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3149  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3150 
3151  Input Parameters:
3152 +   mat - matrix
3153 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3154            i.e., mat->rstart <= isrow[i] < mat->rend
3155 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3156            i.e., mat->cstart <= iscol[i] < mat->cend
3157 
3158  Output Parameters:
3159 +   isrow_d - sequential row index set for retrieving mat->A
3160 .   iscol_d - sequential  column index set for retrieving mat->A
3161 .   iscol_o - sequential column index set for retrieving mat->B
3162 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3163  */
3164 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3165 {
3166   Vec             x, cmap;
3167   const PetscInt *is_idx;
3168   PetscScalar    *xarray, *cmaparray;
3169   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3170   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3171   Mat             B    = a->B;
3172   Vec             lvec = a->lvec, lcmap;
3173   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3174   MPI_Comm        comm;
3175   VecScatter      Mvctx = a->Mvctx;
3176 
3177   PetscFunctionBegin;
3178   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3179   PetscCall(ISGetLocalSize(iscol, &ncols));
3180 
3181   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3182   PetscCall(MatCreateVecs(mat, &x, NULL));
3183   PetscCall(VecSet(x, -1.0));
3184   PetscCall(VecDuplicate(x, &cmap));
3185   PetscCall(VecSet(cmap, -1.0));
3186 
3187   /* Get start indices */
3188   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3189   isstart -= ncols;
3190   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3191 
3192   PetscCall(ISGetIndices(iscol, &is_idx));
3193   PetscCall(VecGetArray(x, &xarray));
3194   PetscCall(VecGetArray(cmap, &cmaparray));
3195   PetscCall(PetscMalloc1(ncols, &idx));
3196   for (i = 0; i < ncols; i++) {
3197     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3198     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3199     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3200   }
3201   PetscCall(VecRestoreArray(x, &xarray));
3202   PetscCall(VecRestoreArray(cmap, &cmaparray));
3203   PetscCall(ISRestoreIndices(iscol, &is_idx));
3204 
3205   /* Get iscol_d */
3206   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3207   PetscCall(ISGetBlockSize(iscol, &i));
3208   PetscCall(ISSetBlockSize(*iscol_d, i));
3209 
3210   /* Get isrow_d */
3211   PetscCall(ISGetLocalSize(isrow, &m));
3212   rstart = mat->rmap->rstart;
3213   PetscCall(PetscMalloc1(m, &idx));
3214   PetscCall(ISGetIndices(isrow, &is_idx));
3215   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3216   PetscCall(ISRestoreIndices(isrow, &is_idx));
3217 
3218   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3219   PetscCall(ISGetBlockSize(isrow, &i));
3220   PetscCall(ISSetBlockSize(*isrow_d, i));
3221 
3222   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3223   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3224   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3225 
3226   PetscCall(VecDuplicate(lvec, &lcmap));
3227 
3228   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3229   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3230 
3231   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3232   /* off-process column indices */
3233   count = 0;
3234   PetscCall(PetscMalloc1(Bn, &idx));
3235   PetscCall(PetscMalloc1(Bn, &cmap1));
3236 
3237   PetscCall(VecGetArray(lvec, &xarray));
3238   PetscCall(VecGetArray(lcmap, &cmaparray));
3239   for (i = 0; i < Bn; i++) {
3240     if (PetscRealPart(xarray[i]) > -1.0) {
3241       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3242       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3243       count++;
3244     }
3245   }
3246   PetscCall(VecRestoreArray(lvec, &xarray));
3247   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3248 
3249   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3250   /* cannot ensure iscol_o has same blocksize as iscol! */
3251 
3252   PetscCall(PetscFree(idx));
3253   *garray = cmap1;
3254 
3255   PetscCall(VecDestroy(&x));
3256   PetscCall(VecDestroy(&cmap));
3257   PetscCall(VecDestroy(&lcmap));
3258   PetscFunctionReturn(PETSC_SUCCESS);
3259 }
3260 
3261 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3263 {
3264   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3265   Mat         M = NULL;
3266   MPI_Comm    comm;
3267   IS          iscol_d, isrow_d, iscol_o;
3268   Mat         Asub = NULL, Bsub = NULL;
3269   PetscInt    n;
3270 
3271   PetscFunctionBegin;
3272   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3273 
3274   if (call == MAT_REUSE_MATRIX) {
3275     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3277     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3278 
3279     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3280     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3281 
3282     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3283     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3284 
3285     /* Update diagonal and off-diagonal portions of submat */
3286     asub = (Mat_MPIAIJ *)(*submat)->data;
3287     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3288     PetscCall(ISGetLocalSize(iscol_o, &n));
3289     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3290     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3291     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3292 
3293   } else { /* call == MAT_INITIAL_MATRIX) */
3294     const PetscInt *garray;
3295     PetscInt        BsubN;
3296 
3297     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3298     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3299 
3300     /* Create local submatrices Asub and Bsub */
3301     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3302     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3303 
3304     /* Create submatrix M */
3305     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3306 
3307     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3308     asub = (Mat_MPIAIJ *)M->data;
3309 
3310     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3311     n = asub->B->cmap->N;
3312     if (BsubN > n) {
3313       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3314       const PetscInt *idx;
3315       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3316       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3317 
3318       PetscCall(PetscMalloc1(n, &idx_new));
3319       j = 0;
3320       PetscCall(ISGetIndices(iscol_o, &idx));
3321       for (i = 0; i < n; i++) {
3322         if (j >= BsubN) break;
3323         while (subgarray[i] > garray[j]) j++;
3324 
3325         if (subgarray[i] == garray[j]) {
3326           idx_new[i] = idx[j++];
3327         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3328       }
3329       PetscCall(ISRestoreIndices(iscol_o, &idx));
3330 
3331       PetscCall(ISDestroy(&iscol_o));
3332       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3333 
3334     } else if (BsubN < n) {
3335       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3336     }
3337 
3338     PetscCall(PetscFree(garray));
3339     *submat = M;
3340 
3341     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3342     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3343     PetscCall(ISDestroy(&isrow_d));
3344 
3345     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3346     PetscCall(ISDestroy(&iscol_d));
3347 
3348     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3349     PetscCall(ISDestroy(&iscol_o));
3350   }
3351   PetscFunctionReturn(PETSC_SUCCESS);
3352 }
3353 
3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3355 {
3356   IS        iscol_local = NULL, isrow_d;
3357   PetscInt  csize;
3358   PetscInt  n, i, j, start, end;
3359   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3360   MPI_Comm  comm;
3361 
3362   PetscFunctionBegin;
3363   /* If isrow has same processor distribution as mat,
3364      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3365   if (call == MAT_REUSE_MATRIX) {
3366     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3367     if (isrow_d) {
3368       sameRowDist  = PETSC_TRUE;
3369       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3370     } else {
3371       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3372       if (iscol_local) {
3373         sameRowDist  = PETSC_TRUE;
3374         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3375       }
3376     }
3377   } else {
3378     /* Check if isrow has same processor distribution as mat */
3379     sameDist[0] = PETSC_FALSE;
3380     PetscCall(ISGetLocalSize(isrow, &n));
3381     if (!n) {
3382       sameDist[0] = PETSC_TRUE;
3383     } else {
3384       PetscCall(ISGetMinMax(isrow, &i, &j));
3385       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3386       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3387     }
3388 
3389     /* Check if iscol has same processor distribution as mat */
3390     sameDist[1] = PETSC_FALSE;
3391     PetscCall(ISGetLocalSize(iscol, &n));
3392     if (!n) {
3393       sameDist[1] = PETSC_TRUE;
3394     } else {
3395       PetscCall(ISGetMinMax(iscol, &i, &j));
3396       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3397       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3398     }
3399 
3400     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3401     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3402     sameRowDist = tsameDist[0];
3403   }
3404 
3405   if (sameRowDist) {
3406     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3407       /* isrow and iscol have same processor distribution as mat */
3408       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3409       PetscFunctionReturn(PETSC_SUCCESS);
3410     } else { /* sameRowDist */
3411       /* isrow has same processor distribution as mat */
3412       if (call == MAT_INITIAL_MATRIX) {
3413         PetscBool sorted;
3414         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3415         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3416         PetscCall(ISGetSize(iscol, &i));
3417         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3418 
3419         PetscCall(ISSorted(iscol_local, &sorted));
3420         if (sorted) {
3421           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3422           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3423           PetscFunctionReturn(PETSC_SUCCESS);
3424         }
3425       } else { /* call == MAT_REUSE_MATRIX */
3426         IS iscol_sub;
3427         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3428         if (iscol_sub) {
3429           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3430           PetscFunctionReturn(PETSC_SUCCESS);
3431         }
3432       }
3433     }
3434   }
3435 
3436   /* General case: iscol -> iscol_local which has global size of iscol */
3437   if (call == MAT_REUSE_MATRIX) {
3438     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3439     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3440   } else {
3441     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3442   }
3443 
3444   PetscCall(ISGetLocalSize(iscol, &csize));
3445   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3446 
3447   if (call == MAT_INITIAL_MATRIX) {
3448     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3449     PetscCall(ISDestroy(&iscol_local));
3450   }
3451   PetscFunctionReturn(PETSC_SUCCESS);
3452 }
3453 
3454 /*@C
3455   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3456   and "off-diagonal" part of the matrix in CSR format.
3457 
3458   Collective
3459 
3460   Input Parameters:
3461 + comm   - MPI communicator
3462 . A      - "diagonal" portion of matrix
3463 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3464 - garray - global index of `B` columns
3465 
3466   Output Parameter:
3467 . mat - the matrix, with input `A` as its local diagonal matrix
3468 
3469   Level: advanced
3470 
3471   Notes:
3472   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3473 
3474   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3475 
3476 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3477 @*/
3478 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3479 {
3480   Mat_MPIAIJ        *maij;
3481   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3482   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3483   const PetscScalar *oa;
3484   Mat                Bnew;
3485   PetscInt           m, n, N;
3486   MatType            mpi_mat_type;
3487 
3488   PetscFunctionBegin;
3489   PetscCall(MatCreate(comm, mat));
3490   PetscCall(MatGetSize(A, &m, &n));
3491   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3492   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3493   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3494   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3495 
3496   /* Get global columns of mat */
3497   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3498 
3499   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3500   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3501   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3502   PetscCall(MatSetType(*mat, mpi_mat_type));
3503 
3504   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3505   maij = (Mat_MPIAIJ *)(*mat)->data;
3506 
3507   (*mat)->preallocated = PETSC_TRUE;
3508 
3509   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3510   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3511 
3512   /* Set A as diagonal portion of *mat */
3513   maij->A = A;
3514 
3515   nz = oi[m];
3516   for (i = 0; i < nz; i++) {
3517     col   = oj[i];
3518     oj[i] = garray[col];
3519   }
3520 
3521   /* Set Bnew as off-diagonal portion of *mat */
3522   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3523   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3524   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3525   bnew        = (Mat_SeqAIJ *)Bnew->data;
3526   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3527   maij->B     = Bnew;
3528 
3529   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3530 
3531   b->free_a  = PETSC_FALSE;
3532   b->free_ij = PETSC_FALSE;
3533   PetscCall(MatDestroy(&B));
3534 
3535   bnew->free_a  = PETSC_TRUE;
3536   bnew->free_ij = PETSC_TRUE;
3537 
3538   /* condense columns of maij->B */
3539   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3540   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3541   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3542   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3543   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3544   PetscFunctionReturn(PETSC_SUCCESS);
3545 }
3546 
3547 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3548 
3549 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3550 {
3551   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3552   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3553   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3554   Mat             M, Msub, B = a->B;
3555   MatScalar      *aa;
3556   Mat_SeqAIJ     *aij;
3557   PetscInt       *garray = a->garray, *colsub, Ncols;
3558   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3559   IS              iscol_sub, iscmap;
3560   const PetscInt *is_idx, *cmap;
3561   PetscBool       allcolumns = PETSC_FALSE;
3562   MPI_Comm        comm;
3563 
3564   PetscFunctionBegin;
3565   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3566   if (call == MAT_REUSE_MATRIX) {
3567     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3568     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3569     PetscCall(ISGetLocalSize(iscol_sub, &count));
3570 
3571     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3572     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3573 
3574     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3575     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3576 
3577     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3578 
3579   } else { /* call == MAT_INITIAL_MATRIX) */
3580     PetscBool flg;
3581 
3582     PetscCall(ISGetLocalSize(iscol, &n));
3583     PetscCall(ISGetSize(iscol, &Ncols));
3584 
3585     /* (1) iscol -> nonscalable iscol_local */
3586     /* Check for special case: each processor gets entire matrix columns */
3587     PetscCall(ISIdentity(iscol_local, &flg));
3588     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3589     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3590     if (allcolumns) {
3591       iscol_sub = iscol_local;
3592       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3593       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3594 
3595     } else {
3596       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3597       PetscInt *idx, *cmap1, k;
3598       PetscCall(PetscMalloc1(Ncols, &idx));
3599       PetscCall(PetscMalloc1(Ncols, &cmap1));
3600       PetscCall(ISGetIndices(iscol_local, &is_idx));
3601       count = 0;
3602       k     = 0;
3603       for (i = 0; i < Ncols; i++) {
3604         j = is_idx[i];
3605         if (j >= cstart && j < cend) {
3606           /* diagonal part of mat */
3607           idx[count]     = j;
3608           cmap1[count++] = i; /* column index in submat */
3609         } else if (Bn) {
3610           /* off-diagonal part of mat */
3611           if (j == garray[k]) {
3612             idx[count]     = j;
3613             cmap1[count++] = i; /* column index in submat */
3614           } else if (j > garray[k]) {
3615             while (j > garray[k] && k < Bn - 1) k++;
3616             if (j == garray[k]) {
3617               idx[count]     = j;
3618               cmap1[count++] = i; /* column index in submat */
3619             }
3620           }
3621         }
3622       }
3623       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3624 
3625       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3626       PetscCall(ISGetBlockSize(iscol, &cbs));
3627       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3628 
3629       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3630     }
3631 
3632     /* (3) Create sequential Msub */
3633     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3634   }
3635 
3636   PetscCall(ISGetLocalSize(iscol_sub, &count));
3637   aij = (Mat_SeqAIJ *)Msub->data;
3638   ii  = aij->i;
3639   PetscCall(ISGetIndices(iscmap, &cmap));
3640 
3641   /*
3642       m - number of local rows
3643       Ncols - number of columns (same on all processors)
3644       rstart - first row in new global matrix generated
3645   */
3646   PetscCall(MatGetSize(Msub, &m, NULL));
3647 
3648   if (call == MAT_INITIAL_MATRIX) {
3649     /* (4) Create parallel newmat */
3650     PetscMPIInt rank, size;
3651     PetscInt    csize;
3652 
3653     PetscCallMPI(MPI_Comm_size(comm, &size));
3654     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3655 
3656     /*
3657         Determine the number of non-zeros in the diagonal and off-diagonal
3658         portions of the matrix in order to do correct preallocation
3659     */
3660 
3661     /* first get start and end of "diagonal" columns */
3662     PetscCall(ISGetLocalSize(iscol, &csize));
3663     if (csize == PETSC_DECIDE) {
3664       PetscCall(ISGetSize(isrow, &mglobal));
3665       if (mglobal == Ncols) { /* square matrix */
3666         nlocal = m;
3667       } else {
3668         nlocal = Ncols / size + ((Ncols % size) > rank);
3669       }
3670     } else {
3671       nlocal = csize;
3672     }
3673     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3674     rstart = rend - nlocal;
3675     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3676 
3677     /* next, compute all the lengths */
3678     jj = aij->j;
3679     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3680     olens = dlens + m;
3681     for (i = 0; i < m; i++) {
3682       jend = ii[i + 1] - ii[i];
3683       olen = 0;
3684       dlen = 0;
3685       for (j = 0; j < jend; j++) {
3686         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3687         else dlen++;
3688         jj++;
3689       }
3690       olens[i] = olen;
3691       dlens[i] = dlen;
3692     }
3693 
3694     PetscCall(ISGetBlockSize(isrow, &bs));
3695     PetscCall(ISGetBlockSize(iscol, &cbs));
3696 
3697     PetscCall(MatCreate(comm, &M));
3698     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3699     PetscCall(MatSetBlockSizes(M, bs, cbs));
3700     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3701     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3702     PetscCall(PetscFree(dlens));
3703 
3704   } else { /* call == MAT_REUSE_MATRIX */
3705     M = *newmat;
3706     PetscCall(MatGetLocalSize(M, &i, NULL));
3707     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3708     PetscCall(MatZeroEntries(M));
3709     /*
3710          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3711        rather than the slower MatSetValues().
3712     */
3713     M->was_assembled = PETSC_TRUE;
3714     M->assembled     = PETSC_FALSE;
3715   }
3716 
3717   /* (5) Set values of Msub to *newmat */
3718   PetscCall(PetscMalloc1(count, &colsub));
3719   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3720 
3721   jj = aij->j;
3722   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3723   for (i = 0; i < m; i++) {
3724     row = rstart + i;
3725     nz  = ii[i + 1] - ii[i];
3726     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3727     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3728     jj += nz;
3729     aa += nz;
3730   }
3731   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3732   PetscCall(ISRestoreIndices(iscmap, &cmap));
3733 
3734   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3735   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3736 
3737   PetscCall(PetscFree(colsub));
3738 
3739   /* save Msub, iscol_sub and iscmap used in processor for next request */
3740   if (call == MAT_INITIAL_MATRIX) {
3741     *newmat = M;
3742     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3743     PetscCall(MatDestroy(&Msub));
3744 
3745     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3746     PetscCall(ISDestroy(&iscol_sub));
3747 
3748     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3749     PetscCall(ISDestroy(&iscmap));
3750 
3751     if (iscol_local) {
3752       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3753       PetscCall(ISDestroy(&iscol_local));
3754     }
3755   }
3756   PetscFunctionReturn(PETSC_SUCCESS);
3757 }
3758 
3759 /*
3760     Not great since it makes two copies of the submatrix, first an SeqAIJ
3761   in local and then by concatenating the local matrices the end result.
3762   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3763 
3764   This requires a sequential iscol with all indices.
3765 */
3766 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3767 {
3768   PetscMPIInt rank, size;
3769   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3770   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3771   Mat         M, Mreuse;
3772   MatScalar  *aa, *vwork;
3773   MPI_Comm    comm;
3774   Mat_SeqAIJ *aij;
3775   PetscBool   colflag, allcolumns = PETSC_FALSE;
3776 
3777   PetscFunctionBegin;
3778   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3779   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3780   PetscCallMPI(MPI_Comm_size(comm, &size));
3781 
3782   /* Check for special case: each processor gets entire matrix columns */
3783   PetscCall(ISIdentity(iscol, &colflag));
3784   PetscCall(ISGetLocalSize(iscol, &n));
3785   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3786   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3787 
3788   if (call == MAT_REUSE_MATRIX) {
3789     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3790     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3791     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3792   } else {
3793     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3794   }
3795 
3796   /*
3797       m - number of local rows
3798       n - number of columns (same on all processors)
3799       rstart - first row in new global matrix generated
3800   */
3801   PetscCall(MatGetSize(Mreuse, &m, &n));
3802   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3803   if (call == MAT_INITIAL_MATRIX) {
3804     aij = (Mat_SeqAIJ *)Mreuse->data;
3805     ii  = aij->i;
3806     jj  = aij->j;
3807 
3808     /*
3809         Determine the number of non-zeros in the diagonal and off-diagonal
3810         portions of the matrix in order to do correct preallocation
3811     */
3812 
3813     /* first get start and end of "diagonal" columns */
3814     if (csize == PETSC_DECIDE) {
3815       PetscCall(ISGetSize(isrow, &mglobal));
3816       if (mglobal == n) { /* square matrix */
3817         nlocal = m;
3818       } else {
3819         nlocal = n / size + ((n % size) > rank);
3820       }
3821     } else {
3822       nlocal = csize;
3823     }
3824     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3825     rstart = rend - nlocal;
3826     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3827 
3828     /* next, compute all the lengths */
3829     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3830     olens = dlens + m;
3831     for (i = 0; i < m; i++) {
3832       jend = ii[i + 1] - ii[i];
3833       olen = 0;
3834       dlen = 0;
3835       for (j = 0; j < jend; j++) {
3836         if (*jj < rstart || *jj >= rend) olen++;
3837         else dlen++;
3838         jj++;
3839       }
3840       olens[i] = olen;
3841       dlens[i] = dlen;
3842     }
3843     PetscCall(MatCreate(comm, &M));
3844     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3845     PetscCall(MatSetBlockSizes(M, bs, cbs));
3846     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3847     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3848     PetscCall(PetscFree(dlens));
3849   } else {
3850     PetscInt ml, nl;
3851 
3852     M = *newmat;
3853     PetscCall(MatGetLocalSize(M, &ml, &nl));
3854     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3855     PetscCall(MatZeroEntries(M));
3856     /*
3857          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3858        rather than the slower MatSetValues().
3859     */
3860     M->was_assembled = PETSC_TRUE;
3861     M->assembled     = PETSC_FALSE;
3862   }
3863   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3864   aij = (Mat_SeqAIJ *)Mreuse->data;
3865   ii  = aij->i;
3866   jj  = aij->j;
3867 
3868   /* trigger copy to CPU if needed */
3869   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3870   for (i = 0; i < m; i++) {
3871     row   = rstart + i;
3872     nz    = ii[i + 1] - ii[i];
3873     cwork = jj;
3874     jj    = PetscSafePointerPlusOffset(jj, nz);
3875     vwork = aa;
3876     aa    = PetscSafePointerPlusOffset(aa, nz);
3877     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3878   }
3879   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3880 
3881   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3882   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3883   *newmat = M;
3884 
3885   /* save submatrix used in processor for next request */
3886   if (call == MAT_INITIAL_MATRIX) {
3887     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3888     PetscCall(MatDestroy(&Mreuse));
3889   }
3890   PetscFunctionReturn(PETSC_SUCCESS);
3891 }
3892 
3893 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3894 {
3895   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3896   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3897   const PetscInt *JJ;
3898   PetscBool       nooffprocentries;
3899   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3900 
3901   PetscFunctionBegin;
3902   PetscCall(PetscLayoutSetUp(B->rmap));
3903   PetscCall(PetscLayoutSetUp(B->cmap));
3904   m       = B->rmap->n;
3905   cstart  = B->cmap->rstart;
3906   cend    = B->cmap->rend;
3907   rstart  = B->rmap->rstart;
3908   irstart = Ii[0];
3909 
3910   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3911 
3912   if (PetscDefined(USE_DEBUG)) {
3913     for (i = 0; i < m; i++) {
3914       nnz = Ii[i + 1] - Ii[i];
3915       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3916       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3917       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3918       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3919     }
3920   }
3921 
3922   for (i = 0; i < m; i++) {
3923     nnz     = Ii[i + 1] - Ii[i];
3924     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3925     nnz_max = PetscMax(nnz_max, nnz);
3926     d       = 0;
3927     for (j = 0; j < nnz; j++) {
3928       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3929     }
3930     d_nnz[i] = d;
3931     o_nnz[i] = nnz - d;
3932   }
3933   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3934   PetscCall(PetscFree2(d_nnz, o_nnz));
3935 
3936   for (i = 0; i < m; i++) {
3937     ii = i + rstart;
3938     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3939   }
3940   nooffprocentries    = B->nooffprocentries;
3941   B->nooffprocentries = PETSC_TRUE;
3942   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3943   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3944   B->nooffprocentries = nooffprocentries;
3945 
3946   /* count number of entries below block diagonal */
3947   PetscCall(PetscFree(Aij->ld));
3948   PetscCall(PetscCalloc1(m, &ld));
3949   Aij->ld = ld;
3950   for (i = 0; i < m; i++) {
3951     nnz = Ii[i + 1] - Ii[i];
3952     j   = 0;
3953     while (j < nnz && J[j] < cstart) j++;
3954     ld[i] = j;
3955     if (J) J += nnz;
3956   }
3957 
3958   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3959   PetscFunctionReturn(PETSC_SUCCESS);
3960 }
3961 
3962 /*@
3963   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3964   (the default parallel PETSc format).
3965 
3966   Collective
3967 
3968   Input Parameters:
3969 + B - the matrix
3970 . i - the indices into `j` for the start of each local row (indices start with zero)
3971 . j - the column indices for each local row (indices start with zero)
3972 - v - optional values in the matrix
3973 
3974   Level: developer
3975 
3976   Notes:
3977   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3978   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3979   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3980 
3981   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3982 
3983   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3984 
3985   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3986 
3987   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3988   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3989 
3990   The format which is used for the sparse matrix input, is equivalent to a
3991   row-major ordering.. i.e for the following matrix, the input data expected is
3992   as shown
3993 .vb
3994         1 0 0
3995         2 0 3     P0
3996        -------
3997         4 5 6     P1
3998 
3999      Process0 [P0] rows_owned=[0,1]
4000         i =  {0,1,3}  [size = nrow+1  = 2+1]
4001         j =  {0,0,2}  [size = 3]
4002         v =  {1,2,3}  [size = 3]
4003 
4004      Process1 [P1] rows_owned=[2]
4005         i =  {0,3}    [size = nrow+1  = 1+1]
4006         j =  {0,1,2}  [size = 3]
4007         v =  {4,5,6}  [size = 3]
4008 .ve
4009 
4010 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4011           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4012 @*/
4013 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4014 {
4015   PetscFunctionBegin;
4016   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4017   PetscFunctionReturn(PETSC_SUCCESS);
4018 }
4019 
4020 /*@
4021   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4022   (the default parallel PETSc format).  For good matrix assembly performance
4023   the user should preallocate the matrix storage by setting the parameters
4024   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4025 
4026   Collective
4027 
4028   Input Parameters:
4029 + B     - the matrix
4030 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4031            (same value is used for all local rows)
4032 . d_nnz - array containing the number of nonzeros in the various rows of the
4033            DIAGONAL portion of the local submatrix (possibly different for each row)
4034            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4035            The size of this array is equal to the number of local rows, i.e 'm'.
4036            For matrices that will be factored, you must leave room for (and set)
4037            the diagonal entry even if it is zero.
4038 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4039            submatrix (same value is used for all local rows).
4040 - o_nnz - array containing the number of nonzeros in the various rows of the
4041            OFF-DIAGONAL portion of the local submatrix (possibly different for
4042            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4043            structure. The size of this array is equal to the number
4044            of local rows, i.e 'm'.
4045 
4046   Example Usage:
4047   Consider the following 8x8 matrix with 34 non-zero values, that is
4048   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4049   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4050   as follows
4051 
4052 .vb
4053             1  2  0  |  0  3  0  |  0  4
4054     Proc0   0  5  6  |  7  0  0  |  8  0
4055             9  0 10  | 11  0  0  | 12  0
4056     -------------------------------------
4057            13  0 14  | 15 16 17  |  0  0
4058     Proc1   0 18  0  | 19 20 21  |  0  0
4059             0  0  0  | 22 23  0  | 24  0
4060     -------------------------------------
4061     Proc2  25 26 27  |  0  0 28  | 29  0
4062            30  0  0  | 31 32 33  |  0 34
4063 .ve
4064 
4065   This can be represented as a collection of submatrices as
4066 .vb
4067       A B C
4068       D E F
4069       G H I
4070 .ve
4071 
4072   Where the submatrices A,B,C are owned by proc0, D,E,F are
4073   owned by proc1, G,H,I are owned by proc2.
4074 
4075   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4076   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4077   The 'M','N' parameters are 8,8, and have the same values on all procs.
4078 
4079   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4080   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4081   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4082   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4083   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4084   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4085 
4086   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4087   allocated for every row of the local diagonal submatrix, and `o_nz`
4088   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4089   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4090   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4091   In this case, the values of `d_nz`, `o_nz` are
4092 .vb
4093      proc0  dnz = 2, o_nz = 2
4094      proc1  dnz = 3, o_nz = 2
4095      proc2  dnz = 1, o_nz = 4
4096 .ve
4097   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4098   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4099   for proc3. i.e we are using 12+15+10=37 storage locations to store
4100   34 values.
4101 
4102   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4103   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4104   In the above case the values for `d_nnz`, `o_nnz` are
4105 .vb
4106      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4107      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4108      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4109 .ve
4110   Here the space allocated is sum of all the above values i.e 34, and
4111   hence pre-allocation is perfect.
4112 
4113   Level: intermediate
4114 
4115   Notes:
4116   If the *_nnz parameter is given then the *_nz parameter is ignored
4117 
4118   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4119   storage.  The stored row and column indices begin with zero.
4120   See [Sparse Matrices](sec_matsparse) for details.
4121 
4122   The parallel matrix is partitioned such that the first m0 rows belong to
4123   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4124   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4125 
4126   The DIAGONAL portion of the local submatrix of a processor can be defined
4127   as the submatrix which is obtained by extraction the part corresponding to
4128   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4129   first row that belongs to the processor, r2 is the last row belonging to
4130   the this processor, and c1-c2 is range of indices of the local part of a
4131   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4132   common case of a square matrix, the row and column ranges are the same and
4133   the DIAGONAL part is also square. The remaining portion of the local
4134   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4135 
4136   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4137 
4138   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4139   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4140   You can also run with the option `-info` and look for messages with the string
4141   malloc in them to see if additional memory allocation was needed.
4142 
4143 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4144           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4145 @*/
4146 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4147 {
4148   PetscFunctionBegin;
4149   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4150   PetscValidType(B, 1);
4151   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4152   PetscFunctionReturn(PETSC_SUCCESS);
4153 }
4154 
4155 /*@
4156   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4157   CSR format for the local rows.
4158 
4159   Collective
4160 
4161   Input Parameters:
4162 + comm - MPI communicator
4163 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4164 . n    - This value should be the same as the local size used in creating the
4165          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4166          calculated if `N` is given) For square matrices n is almost always `m`.
4167 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4168 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4169 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4170 . j    - global column indices
4171 - a    - optional matrix values
4172 
4173   Output Parameter:
4174 . mat - the matrix
4175 
4176   Level: intermediate
4177 
4178   Notes:
4179   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4180   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4181   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4182 
4183   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4184 
4185   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4186 
4187   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4188   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4189 
4190   The format which is used for the sparse matrix input, is equivalent to a
4191   row-major ordering, i.e., for the following matrix, the input data expected is
4192   as shown
4193 .vb
4194         1 0 0
4195         2 0 3     P0
4196        -------
4197         4 5 6     P1
4198 
4199      Process0 [P0] rows_owned=[0,1]
4200         i =  {0,1,3}  [size = nrow+1  = 2+1]
4201         j =  {0,0,2}  [size = 3]
4202         v =  {1,2,3}  [size = 3]
4203 
4204      Process1 [P1] rows_owned=[2]
4205         i =  {0,3}    [size = nrow+1  = 1+1]
4206         j =  {0,1,2}  [size = 3]
4207         v =  {4,5,6}  [size = 3]
4208 .ve
4209 
4210 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4211           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4212 @*/
4213 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4214 {
4215   PetscFunctionBegin;
4216   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4217   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4218   PetscCall(MatCreate(comm, mat));
4219   PetscCall(MatSetSizes(*mat, m, n, M, N));
4220   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4221   PetscCall(MatSetType(*mat, MATMPIAIJ));
4222   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4223   PetscFunctionReturn(PETSC_SUCCESS);
4224 }
4225 
4226 /*@
4227   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4228   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4229   from `MatCreateMPIAIJWithArrays()`
4230 
4231   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4232 
4233   Collective
4234 
4235   Input Parameters:
4236 + mat - the matrix
4237 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4238 . n   - This value should be the same as the local size used in creating the
4239        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4240        calculated if N is given) For square matrices n is almost always m.
4241 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4242 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4243 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4244 . J   - column indices
4245 - v   - matrix values
4246 
4247   Level: deprecated
4248 
4249 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4250           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4251 @*/
4252 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4253 {
4254   PetscInt        nnz, i;
4255   PetscBool       nooffprocentries;
4256   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4257   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4258   PetscScalar    *ad, *ao;
4259   PetscInt        ldi, Iii, md;
4260   const PetscInt *Adi = Ad->i;
4261   PetscInt       *ld  = Aij->ld;
4262 
4263   PetscFunctionBegin;
4264   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4265   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4266   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4267   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4268 
4269   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4270   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4271 
4272   for (i = 0; i < m; i++) {
4273     if (PetscDefined(USE_DEBUG)) {
4274       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4275         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4276         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4277       }
4278     }
4279     nnz = Ii[i + 1] - Ii[i];
4280     Iii = Ii[i];
4281     ldi = ld[i];
4282     md  = Adi[i + 1] - Adi[i];
4283     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4284     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4285     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4286     ad += md;
4287     ao += nnz - md;
4288   }
4289   nooffprocentries      = mat->nooffprocentries;
4290   mat->nooffprocentries = PETSC_TRUE;
4291   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4292   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4293   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4294   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4295   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4296   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4297   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4298   mat->nooffprocentries = nooffprocentries;
4299   PetscFunctionReturn(PETSC_SUCCESS);
4300 }
4301 
4302 /*@
4303   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4304 
4305   Collective
4306 
4307   Input Parameters:
4308 + mat - the matrix
4309 - v   - matrix values, stored by row
4310 
4311   Level: intermediate
4312 
4313   Notes:
4314   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4315 
4316   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4317 
4318 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4319           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4320 @*/
4321 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4322 {
4323   PetscInt        nnz, i, m;
4324   PetscBool       nooffprocentries;
4325   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4326   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4327   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4328   PetscScalar    *ad, *ao;
4329   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4330   PetscInt        ldi, Iii, md;
4331   PetscInt       *ld = Aij->ld;
4332 
4333   PetscFunctionBegin;
4334   m = mat->rmap->n;
4335 
4336   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4337   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4338   Iii = 0;
4339   for (i = 0; i < m; i++) {
4340     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4341     ldi = ld[i];
4342     md  = Adi[i + 1] - Adi[i];
4343     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4344     ad += md;
4345     if (ao) {
4346       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4347       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4348       ao += nnz - md;
4349     }
4350     Iii += nnz;
4351   }
4352   nooffprocentries      = mat->nooffprocentries;
4353   mat->nooffprocentries = PETSC_TRUE;
4354   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4355   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4356   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4357   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4358   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4359   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4360   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4361   mat->nooffprocentries = nooffprocentries;
4362   PetscFunctionReturn(PETSC_SUCCESS);
4363 }
4364 
4365 /*@
4366   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4367   (the default parallel PETSc format).  For good matrix assembly performance
4368   the user should preallocate the matrix storage by setting the parameters
4369   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4370 
4371   Collective
4372 
4373   Input Parameters:
4374 + comm  - MPI communicator
4375 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4376           This value should be the same as the local size used in creating the
4377           y vector for the matrix-vector product y = Ax.
4378 . n     - This value should be the same as the local size used in creating the
4379           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4380           calculated if N is given) For square matrices n is almost always m.
4381 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4382 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4383 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4384           (same value is used for all local rows)
4385 . d_nnz - array containing the number of nonzeros in the various rows of the
4386           DIAGONAL portion of the local submatrix (possibly different for each row)
4387           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4388           The size of this array is equal to the number of local rows, i.e 'm'.
4389 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4390           submatrix (same value is used for all local rows).
4391 - o_nnz - array containing the number of nonzeros in the various rows of the
4392           OFF-DIAGONAL portion of the local submatrix (possibly different for
4393           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4394           structure. The size of this array is equal to the number
4395           of local rows, i.e 'm'.
4396 
4397   Output Parameter:
4398 . A - the matrix
4399 
4400   Options Database Keys:
4401 + -mat_no_inode                     - Do not use inodes
4402 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4403 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4404                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4405                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4406 
4407   Level: intermediate
4408 
4409   Notes:
4410   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4411   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4412   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4413 
4414   If the *_nnz parameter is given then the *_nz parameter is ignored
4415 
4416   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4417   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4418   storage requirements for this matrix.
4419 
4420   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4421   processor than it must be used on all processors that share the object for
4422   that argument.
4423 
4424   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4425   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4426 
4427   The user MUST specify either the local or global matrix dimensions
4428   (possibly both).
4429 
4430   The parallel matrix is partitioned across processors such that the
4431   first `m0` rows belong to process 0, the next `m1` rows belong to
4432   process 1, the next `m2` rows belong to process 2, etc., where
4433   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4434   values corresponding to [m x N] submatrix.
4435 
4436   The columns are logically partitioned with the n0 columns belonging
4437   to 0th partition, the next n1 columns belonging to the next
4438   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4439 
4440   The DIAGONAL portion of the local submatrix on any given processor
4441   is the submatrix corresponding to the rows and columns m,n
4442   corresponding to the given processor. i.e diagonal matrix on
4443   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4444   etc. The remaining portion of the local submatrix [m x (N-n)]
4445   constitute the OFF-DIAGONAL portion. The example below better
4446   illustrates this concept.
4447 
4448   For a square global matrix we define each processor's diagonal portion
4449   to be its local rows and the corresponding columns (a square submatrix);
4450   each processor's off-diagonal portion encompasses the remainder of the
4451   local matrix (a rectangular submatrix).
4452 
4453   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4454 
4455   When calling this routine with a single process communicator, a matrix of
4456   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4457   type of communicator, use the construction mechanism
4458 .vb
4459   MatCreate(..., &A);
4460   MatSetType(A, MATMPIAIJ);
4461   MatSetSizes(A, m, n, M, N);
4462   MatMPIAIJSetPreallocation(A, ...);
4463 .ve
4464 
4465   By default, this format uses inodes (identical nodes) when possible.
4466   We search for consecutive rows with the same nonzero structure, thereby
4467   reusing matrix information to achieve increased efficiency.
4468 
4469   Example Usage:
4470   Consider the following 8x8 matrix with 34 non-zero values, that is
4471   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4472   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4473   as follows
4474 
4475 .vb
4476             1  2  0  |  0  3  0  |  0  4
4477     Proc0   0  5  6  |  7  0  0  |  8  0
4478             9  0 10  | 11  0  0  | 12  0
4479     -------------------------------------
4480            13  0 14  | 15 16 17  |  0  0
4481     Proc1   0 18  0  | 19 20 21  |  0  0
4482             0  0  0  | 22 23  0  | 24  0
4483     -------------------------------------
4484     Proc2  25 26 27  |  0  0 28  | 29  0
4485            30  0  0  | 31 32 33  |  0 34
4486 .ve
4487 
4488   This can be represented as a collection of submatrices as
4489 
4490 .vb
4491       A B C
4492       D E F
4493       G H I
4494 .ve
4495 
4496   Where the submatrices A,B,C are owned by proc0, D,E,F are
4497   owned by proc1, G,H,I are owned by proc2.
4498 
4499   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4500   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4501   The 'M','N' parameters are 8,8, and have the same values on all procs.
4502 
4503   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4504   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4505   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4506   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4507   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4508   matrix, ans [DF] as another SeqAIJ matrix.
4509 
4510   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4511   allocated for every row of the local diagonal submatrix, and `o_nz`
4512   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4513   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4514   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4515   In this case, the values of `d_nz`,`o_nz` are
4516 .vb
4517      proc0  dnz = 2, o_nz = 2
4518      proc1  dnz = 3, o_nz = 2
4519      proc2  dnz = 1, o_nz = 4
4520 .ve
4521   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4522   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4523   for proc3. i.e we are using 12+15+10=37 storage locations to store
4524   34 values.
4525 
4526   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4527   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4528   In the above case the values for d_nnz,o_nnz are
4529 .vb
4530      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4531      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4532      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4533 .ve
4534   Here the space allocated is sum of all the above values i.e 34, and
4535   hence pre-allocation is perfect.
4536 
4537 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4538           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4539           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4540 @*/
4541 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4542 {
4543   PetscMPIInt size;
4544 
4545   PetscFunctionBegin;
4546   PetscCall(MatCreate(comm, A));
4547   PetscCall(MatSetSizes(*A, m, n, M, N));
4548   PetscCallMPI(MPI_Comm_size(comm, &size));
4549   if (size > 1) {
4550     PetscCall(MatSetType(*A, MATMPIAIJ));
4551     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4552   } else {
4553     PetscCall(MatSetType(*A, MATSEQAIJ));
4554     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4555   }
4556   PetscFunctionReturn(PETSC_SUCCESS);
4557 }
4558 
4559 /*MC
4560     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4561 
4562     Synopsis:
4563     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4564 
4565     Not Collective
4566 
4567     Input Parameter:
4568 .   A - the `MATMPIAIJ` matrix
4569 
4570     Output Parameters:
4571 +   Ad - the diagonal portion of the matrix
4572 .   Ao - the off-diagonal portion of the matrix
4573 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4574 -   ierr - error code
4575 
4576      Level: advanced
4577 
4578     Note:
4579     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4580 
4581 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4582 M*/
4583 
4584 /*MC
4585     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4586 
4587     Synopsis:
4588     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4589 
4590     Not Collective
4591 
4592     Input Parameters:
4593 +   A - the `MATMPIAIJ` matrix
4594 .   Ad - the diagonal portion of the matrix
4595 .   Ao - the off-diagonal portion of the matrix
4596 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4597 -   ierr - error code
4598 
4599      Level: advanced
4600 
4601 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4602 M*/
4603 
4604 /*@C
4605   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4606 
4607   Not Collective
4608 
4609   Input Parameter:
4610 . A - The `MATMPIAIJ` matrix
4611 
4612   Output Parameters:
4613 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4614 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4615 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4616 
4617   Level: intermediate
4618 
4619   Note:
4620   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4621   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4622   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4623   local column numbers to global column numbers in the original matrix.
4624 
4625   Fortran Notes:
4626   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4627 
4628 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4629 @*/
4630 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4631 {
4632   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4633   PetscBool   flg;
4634 
4635   PetscFunctionBegin;
4636   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4637   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4638   if (Ad) *Ad = a->A;
4639   if (Ao) *Ao = a->B;
4640   if (colmap) *colmap = a->garray;
4641   PetscFunctionReturn(PETSC_SUCCESS);
4642 }
4643 
4644 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4645 {
4646   PetscInt     m, N, i, rstart, nnz, Ii;
4647   PetscInt    *indx;
4648   PetscScalar *values;
4649   MatType      rootType;
4650 
4651   PetscFunctionBegin;
4652   PetscCall(MatGetSize(inmat, &m, &N));
4653   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4654     PetscInt *dnz, *onz, sum, bs, cbs;
4655 
4656     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4657     /* Check sum(n) = N */
4658     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4659     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4660 
4661     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4662     rstart -= m;
4663 
4664     MatPreallocateBegin(comm, m, n, dnz, onz);
4665     for (i = 0; i < m; i++) {
4666       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4667       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4668       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4669     }
4670 
4671     PetscCall(MatCreate(comm, outmat));
4672     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4673     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4674     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4675     PetscCall(MatGetRootType_Private(inmat, &rootType));
4676     PetscCall(MatSetType(*outmat, rootType));
4677     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4678     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4679     MatPreallocateEnd(dnz, onz);
4680     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4681   }
4682 
4683   /* numeric phase */
4684   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4685   for (i = 0; i < m; i++) {
4686     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4687     Ii = i + rstart;
4688     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4689     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4690   }
4691   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4692   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4693   PetscFunctionReturn(PETSC_SUCCESS);
4694 }
4695 
4696 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4697 {
4698   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4699 
4700   PetscFunctionBegin;
4701   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4702   PetscCall(PetscFree(merge->id_r));
4703   PetscCall(PetscFree(merge->len_s));
4704   PetscCall(PetscFree(merge->len_r));
4705   PetscCall(PetscFree(merge->bi));
4706   PetscCall(PetscFree(merge->bj));
4707   PetscCall(PetscFree(merge->buf_ri[0]));
4708   PetscCall(PetscFree(merge->buf_ri));
4709   PetscCall(PetscFree(merge->buf_rj[0]));
4710   PetscCall(PetscFree(merge->buf_rj));
4711   PetscCall(PetscFree(merge->coi));
4712   PetscCall(PetscFree(merge->coj));
4713   PetscCall(PetscFree(merge->owners_co));
4714   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4715   PetscCall(PetscFree(merge));
4716   PetscFunctionReturn(PETSC_SUCCESS);
4717 }
4718 
4719 #include <../src/mat/utils/freespace.h>
4720 #include <petscbt.h>
4721 
4722 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4723 {
4724   MPI_Comm             comm;
4725   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4726   PetscMPIInt          size, rank, taga, *len_s;
4727   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4728   PetscMPIInt          proc, k;
4729   PetscInt           **buf_ri, **buf_rj;
4730   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4731   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4732   MPI_Request         *s_waits, *r_waits;
4733   MPI_Status          *status;
4734   const MatScalar     *aa, *a_a;
4735   MatScalar          **abuf_r, *ba_i;
4736   Mat_Merge_SeqsToMPI *merge;
4737   PetscContainer       container;
4738 
4739   PetscFunctionBegin;
4740   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4741   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4742 
4743   PetscCallMPI(MPI_Comm_size(comm, &size));
4744   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4745 
4746   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4747   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4748   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4749   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4750   aa = a_a;
4751 
4752   bi     = merge->bi;
4753   bj     = merge->bj;
4754   buf_ri = merge->buf_ri;
4755   buf_rj = merge->buf_rj;
4756 
4757   PetscCall(PetscMalloc1(size, &status));
4758   owners = merge->rowmap->range;
4759   len_s  = merge->len_s;
4760 
4761   /* send and recv matrix values */
4762   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4763   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4764 
4765   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4766   for (proc = 0, k = 0; proc < size; proc++) {
4767     if (!len_s[proc]) continue;
4768     i = owners[proc];
4769     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4770     k++;
4771   }
4772 
4773   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4774   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4775   PetscCall(PetscFree(status));
4776 
4777   PetscCall(PetscFree(s_waits));
4778   PetscCall(PetscFree(r_waits));
4779 
4780   /* insert mat values of mpimat */
4781   PetscCall(PetscMalloc1(N, &ba_i));
4782   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4783 
4784   for (k = 0; k < merge->nrecv; k++) {
4785     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4786     nrows       = *buf_ri_k[k];
4787     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4788     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4789   }
4790 
4791   /* set values of ba */
4792   m = merge->rowmap->n;
4793   for (i = 0; i < m; i++) {
4794     arow = owners[rank] + i;
4795     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4796     bnzi = bi[i + 1] - bi[i];
4797     PetscCall(PetscArrayzero(ba_i, bnzi));
4798 
4799     /* add local non-zero vals of this proc's seqmat into ba */
4800     anzi   = ai[arow + 1] - ai[arow];
4801     aj     = a->j + ai[arow];
4802     aa     = a_a + ai[arow];
4803     nextaj = 0;
4804     for (j = 0; nextaj < anzi; j++) {
4805       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4806         ba_i[j] += aa[nextaj++];
4807       }
4808     }
4809 
4810     /* add received vals into ba */
4811     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4812       /* i-th row */
4813       if (i == *nextrow[k]) {
4814         anzi   = *(nextai[k] + 1) - *nextai[k];
4815         aj     = buf_rj[k] + *nextai[k];
4816         aa     = abuf_r[k] + *nextai[k];
4817         nextaj = 0;
4818         for (j = 0; nextaj < anzi; j++) {
4819           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4820             ba_i[j] += aa[nextaj++];
4821           }
4822         }
4823         nextrow[k]++;
4824         nextai[k]++;
4825       }
4826     }
4827     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4828   }
4829   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4830   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4831   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4832 
4833   PetscCall(PetscFree(abuf_r[0]));
4834   PetscCall(PetscFree(abuf_r));
4835   PetscCall(PetscFree(ba_i));
4836   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4837   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4838   PetscFunctionReturn(PETSC_SUCCESS);
4839 }
4840 
4841 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4842 {
4843   Mat                  B_mpi;
4844   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4845   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4846   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4847   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4848   PetscInt             len, *dnz, *onz, bs, cbs;
4849   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4850   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4851   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4852   MPI_Status          *status;
4853   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4854   PetscBT              lnkbt;
4855   Mat_Merge_SeqsToMPI *merge;
4856   PetscContainer       container;
4857 
4858   PetscFunctionBegin;
4859   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4860 
4861   /* make sure it is a PETSc comm */
4862   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4863   PetscCallMPI(MPI_Comm_size(comm, &size));
4864   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4865 
4866   PetscCall(PetscNew(&merge));
4867   PetscCall(PetscMalloc1(size, &status));
4868 
4869   /* determine row ownership */
4870   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4871   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4872   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4873   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4874   PetscCall(PetscLayoutSetUp(merge->rowmap));
4875   PetscCall(PetscMalloc1(size, &len_si));
4876   PetscCall(PetscMalloc1(size, &merge->len_s));
4877 
4878   m      = merge->rowmap->n;
4879   owners = merge->rowmap->range;
4880 
4881   /* determine the number of messages to send, their lengths */
4882   len_s = merge->len_s;
4883 
4884   len          = 0; /* length of buf_si[] */
4885   merge->nsend = 0;
4886   for (PetscMPIInt proc = 0; proc < size; proc++) {
4887     len_si[proc] = 0;
4888     if (proc == rank) {
4889       len_s[proc] = 0;
4890     } else {
4891       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4892       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4893     }
4894     if (len_s[proc]) {
4895       merge->nsend++;
4896       nrows = 0;
4897       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4898         if (ai[i + 1] > ai[i]) nrows++;
4899       }
4900       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4901       len += len_si[proc];
4902     }
4903   }
4904 
4905   /* determine the number and length of messages to receive for ij-structure */
4906   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4907   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4908 
4909   /* post the Irecv of j-structure */
4910   PetscCall(PetscCommGetNewTag(comm, &tagj));
4911   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4912 
4913   /* post the Isend of j-structure */
4914   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4915 
4916   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4917     if (!len_s[proc]) continue;
4918     i = owners[proc];
4919     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4920     k++;
4921   }
4922 
4923   /* receives and sends of j-structure are complete */
4924   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4925   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4926 
4927   /* send and recv i-structure */
4928   PetscCall(PetscCommGetNewTag(comm, &tagi));
4929   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4930 
4931   PetscCall(PetscMalloc1(len + 1, &buf_s));
4932   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4933   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4934     if (!len_s[proc]) continue;
4935     /* form outgoing message for i-structure:
4936          buf_si[0]:                 nrows to be sent
4937                [1:nrows]:           row index (global)
4938                [nrows+1:2*nrows+1]: i-structure index
4939     */
4940     nrows       = len_si[proc] / 2 - 1;
4941     buf_si_i    = buf_si + nrows + 1;
4942     buf_si[0]   = nrows;
4943     buf_si_i[0] = 0;
4944     nrows       = 0;
4945     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4946       anzi = ai[i + 1] - ai[i];
4947       if (anzi) {
4948         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4949         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4950         nrows++;
4951       }
4952     }
4953     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4954     k++;
4955     buf_si += len_si[proc];
4956   }
4957 
4958   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4959   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4960 
4961   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4962   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4963 
4964   PetscCall(PetscFree(len_si));
4965   PetscCall(PetscFree(len_ri));
4966   PetscCall(PetscFree(rj_waits));
4967   PetscCall(PetscFree2(si_waits, sj_waits));
4968   PetscCall(PetscFree(ri_waits));
4969   PetscCall(PetscFree(buf_s));
4970   PetscCall(PetscFree(status));
4971 
4972   /* compute a local seq matrix in each processor */
4973   /* allocate bi array and free space for accumulating nonzero column info */
4974   PetscCall(PetscMalloc1(m + 1, &bi));
4975   bi[0] = 0;
4976 
4977   /* create and initialize a linked list */
4978   nlnk = N + 1;
4979   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4980 
4981   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4982   len = ai[owners[rank + 1]] - ai[owners[rank]];
4983   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4984 
4985   current_space = free_space;
4986 
4987   /* determine symbolic info for each local row */
4988   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4989 
4990   for (k = 0; k < merge->nrecv; k++) {
4991     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4992     nrows       = *buf_ri_k[k];
4993     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4994     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4995   }
4996 
4997   MatPreallocateBegin(comm, m, n, dnz, onz);
4998   len = 0;
4999   for (i = 0; i < m; i++) {
5000     bnzi = 0;
5001     /* add local non-zero cols of this proc's seqmat into lnk */
5002     arow = owners[rank] + i;
5003     anzi = ai[arow + 1] - ai[arow];
5004     aj   = a->j + ai[arow];
5005     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5006     bnzi += nlnk;
5007     /* add received col data into lnk */
5008     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5009       if (i == *nextrow[k]) {            /* i-th row */
5010         anzi = *(nextai[k] + 1) - *nextai[k];
5011         aj   = buf_rj[k] + *nextai[k];
5012         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5013         bnzi += nlnk;
5014         nextrow[k]++;
5015         nextai[k]++;
5016       }
5017     }
5018     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5019 
5020     /* if free space is not available, make more free space */
5021     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5022     /* copy data into free space, then initialize lnk */
5023     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5024     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5025 
5026     current_space->array += bnzi;
5027     current_space->local_used += bnzi;
5028     current_space->local_remaining -= bnzi;
5029 
5030     bi[i + 1] = bi[i] + bnzi;
5031   }
5032 
5033   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5034 
5035   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5036   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5037   PetscCall(PetscLLDestroy(lnk, lnkbt));
5038 
5039   /* create symbolic parallel matrix B_mpi */
5040   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5041   PetscCall(MatCreate(comm, &B_mpi));
5042   if (n == PETSC_DECIDE) {
5043     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5044   } else {
5045     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5046   }
5047   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5048   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5049   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5050   MatPreallocateEnd(dnz, onz);
5051   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5052 
5053   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5054   B_mpi->assembled = PETSC_FALSE;
5055   merge->bi        = bi;
5056   merge->bj        = bj;
5057   merge->buf_ri    = buf_ri;
5058   merge->buf_rj    = buf_rj;
5059   merge->coi       = NULL;
5060   merge->coj       = NULL;
5061   merge->owners_co = NULL;
5062 
5063   PetscCall(PetscCommDestroy(&comm));
5064 
5065   /* attach the supporting struct to B_mpi for reuse */
5066   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5067   PetscCall(PetscContainerSetPointer(container, merge));
5068   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5069   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5070   PetscCall(PetscContainerDestroy(&container));
5071   *mpimat = B_mpi;
5072 
5073   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5074   PetscFunctionReturn(PETSC_SUCCESS);
5075 }
5076 
5077 /*@
5078   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5079   matrices from each processor
5080 
5081   Collective
5082 
5083   Input Parameters:
5084 + comm   - the communicators the parallel matrix will live on
5085 . seqmat - the input sequential matrices
5086 . m      - number of local rows (or `PETSC_DECIDE`)
5087 . n      - number of local columns (or `PETSC_DECIDE`)
5088 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5089 
5090   Output Parameter:
5091 . mpimat - the parallel matrix generated
5092 
5093   Level: advanced
5094 
5095   Note:
5096   The dimensions of the sequential matrix in each processor MUST be the same.
5097   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5098   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5099 
5100 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5101 @*/
5102 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5103 {
5104   PetscMPIInt size;
5105 
5106   PetscFunctionBegin;
5107   PetscCallMPI(MPI_Comm_size(comm, &size));
5108   if (size == 1) {
5109     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5110     if (scall == MAT_INITIAL_MATRIX) {
5111       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5112     } else {
5113       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5114     }
5115     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5116     PetscFunctionReturn(PETSC_SUCCESS);
5117   }
5118   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5119   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5120   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5121   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5122   PetscFunctionReturn(PETSC_SUCCESS);
5123 }
5124 
5125 /*@
5126   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5127 
5128   Not Collective
5129 
5130   Input Parameter:
5131 . A - the matrix
5132 
5133   Output Parameter:
5134 . A_loc - the local sequential matrix generated
5135 
5136   Level: developer
5137 
5138   Notes:
5139   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5140   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5141   `n` is the global column count obtained with `MatGetSize()`
5142 
5143   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5144 
5145   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5146 
5147   Destroy the matrix with `MatDestroy()`
5148 
5149 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5150 @*/
5151 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5152 {
5153   PetscBool mpi;
5154 
5155   PetscFunctionBegin;
5156   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5157   if (mpi) {
5158     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5159   } else {
5160     *A_loc = A;
5161     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5162   }
5163   PetscFunctionReturn(PETSC_SUCCESS);
5164 }
5165 
5166 /*@
5167   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5168 
5169   Not Collective
5170 
5171   Input Parameters:
5172 + A     - the matrix
5173 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5174 
5175   Output Parameter:
5176 . A_loc - the local sequential matrix generated
5177 
5178   Level: developer
5179 
5180   Notes:
5181   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5182   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5183   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5184 
5185   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5186 
5187   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5188   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5189   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5190   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5191 
5192 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5193 @*/
5194 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5195 {
5196   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5197   Mat_SeqAIJ        *mat, *a, *b;
5198   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5199   const PetscScalar *aa, *ba, *aav, *bav;
5200   PetscScalar       *ca, *cam;
5201   PetscMPIInt        size;
5202   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5203   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5204   PetscBool          match;
5205 
5206   PetscFunctionBegin;
5207   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5208   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5209   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5210   if (size == 1) {
5211     if (scall == MAT_INITIAL_MATRIX) {
5212       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5213       *A_loc = mpimat->A;
5214     } else if (scall == MAT_REUSE_MATRIX) {
5215       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5216     }
5217     PetscFunctionReturn(PETSC_SUCCESS);
5218   }
5219 
5220   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5221   a  = (Mat_SeqAIJ *)mpimat->A->data;
5222   b  = (Mat_SeqAIJ *)mpimat->B->data;
5223   ai = a->i;
5224   aj = a->j;
5225   bi = b->i;
5226   bj = b->j;
5227   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5228   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5229   aa = aav;
5230   ba = bav;
5231   if (scall == MAT_INITIAL_MATRIX) {
5232     PetscCall(PetscMalloc1(1 + am, &ci));
5233     ci[0] = 0;
5234     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5235     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5236     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5237     k = 0;
5238     for (i = 0; i < am; i++) {
5239       ncols_o = bi[i + 1] - bi[i];
5240       ncols_d = ai[i + 1] - ai[i];
5241       /* off-diagonal portion of A */
5242       for (jo = 0; jo < ncols_o; jo++) {
5243         col = cmap[*bj];
5244         if (col >= cstart) break;
5245         cj[k] = col;
5246         bj++;
5247         ca[k++] = *ba++;
5248       }
5249       /* diagonal portion of A */
5250       for (j = 0; j < ncols_d; j++) {
5251         cj[k]   = cstart + *aj++;
5252         ca[k++] = *aa++;
5253       }
5254       /* off-diagonal portion of A */
5255       for (j = jo; j < ncols_o; j++) {
5256         cj[k]   = cmap[*bj++];
5257         ca[k++] = *ba++;
5258       }
5259     }
5260     /* put together the new matrix */
5261     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5262     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5263     /* Since these are PETSc arrays, change flags to free them as necessary. */
5264     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5265     mat->free_a  = PETSC_TRUE;
5266     mat->free_ij = PETSC_TRUE;
5267     mat->nonew   = 0;
5268   } else if (scall == MAT_REUSE_MATRIX) {
5269     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5270     ci  = mat->i;
5271     cj  = mat->j;
5272     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5273     for (i = 0; i < am; i++) {
5274       /* off-diagonal portion of A */
5275       ncols_o = bi[i + 1] - bi[i];
5276       for (jo = 0; jo < ncols_o; jo++) {
5277         col = cmap[*bj];
5278         if (col >= cstart) break;
5279         *cam++ = *ba++;
5280         bj++;
5281       }
5282       /* diagonal portion of A */
5283       ncols_d = ai[i + 1] - ai[i];
5284       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5285       /* off-diagonal portion of A */
5286       for (j = jo; j < ncols_o; j++) {
5287         *cam++ = *ba++;
5288         bj++;
5289       }
5290     }
5291     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5292   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5293   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5294   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5295   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5296   PetscFunctionReturn(PETSC_SUCCESS);
5297 }
5298 
5299 /*@
5300   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5301   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5302 
5303   Not Collective
5304 
5305   Input Parameters:
5306 + A     - the matrix
5307 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5308 
5309   Output Parameters:
5310 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5311 - A_loc - the local sequential matrix generated
5312 
5313   Level: developer
5314 
5315   Note:
5316   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5317   part, then those associated with the off-diagonal part (in its local ordering)
5318 
5319 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5320 @*/
5321 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5322 {
5323   Mat             Ao, Ad;
5324   const PetscInt *cmap;
5325   PetscMPIInt     size;
5326   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5327 
5328   PetscFunctionBegin;
5329   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5330   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5331   if (size == 1) {
5332     if (scall == MAT_INITIAL_MATRIX) {
5333       PetscCall(PetscObjectReference((PetscObject)Ad));
5334       *A_loc = Ad;
5335     } else if (scall == MAT_REUSE_MATRIX) {
5336       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5337     }
5338     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5339     PetscFunctionReturn(PETSC_SUCCESS);
5340   }
5341   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5342   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5343   if (f) {
5344     PetscCall((*f)(A, scall, glob, A_loc));
5345   } else {
5346     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5347     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5348     Mat_SeqAIJ        *c;
5349     PetscInt          *ai = a->i, *aj = a->j;
5350     PetscInt          *bi = b->i, *bj = b->j;
5351     PetscInt          *ci, *cj;
5352     const PetscScalar *aa, *ba;
5353     PetscScalar       *ca;
5354     PetscInt           i, j, am, dn, on;
5355 
5356     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5357     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5358     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5359     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5360     if (scall == MAT_INITIAL_MATRIX) {
5361       PetscInt k;
5362       PetscCall(PetscMalloc1(1 + am, &ci));
5363       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5364       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5365       ci[0] = 0;
5366       for (i = 0, k = 0; i < am; i++) {
5367         const PetscInt ncols_o = bi[i + 1] - bi[i];
5368         const PetscInt ncols_d = ai[i + 1] - ai[i];
5369         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5370         /* diagonal portion of A */
5371         for (j = 0; j < ncols_d; j++, k++) {
5372           cj[k] = *aj++;
5373           ca[k] = *aa++;
5374         }
5375         /* off-diagonal portion of A */
5376         for (j = 0; j < ncols_o; j++, k++) {
5377           cj[k] = dn + *bj++;
5378           ca[k] = *ba++;
5379         }
5380       }
5381       /* put together the new matrix */
5382       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5383       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5384       /* Since these are PETSc arrays, change flags to free them as necessary. */
5385       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5386       c->free_a  = PETSC_TRUE;
5387       c->free_ij = PETSC_TRUE;
5388       c->nonew   = 0;
5389       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5390     } else if (scall == MAT_REUSE_MATRIX) {
5391       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5392       for (i = 0; i < am; i++) {
5393         const PetscInt ncols_d = ai[i + 1] - ai[i];
5394         const PetscInt ncols_o = bi[i + 1] - bi[i];
5395         /* diagonal portion of A */
5396         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5397         /* off-diagonal portion of A */
5398         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5399       }
5400       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5401     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5402     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5403     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5404     if (glob) {
5405       PetscInt cst, *gidx;
5406 
5407       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5408       PetscCall(PetscMalloc1(dn + on, &gidx));
5409       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5410       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5411       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5412     }
5413   }
5414   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5415   PetscFunctionReturn(PETSC_SUCCESS);
5416 }
5417 
5418 /*@C
5419   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5420 
5421   Not Collective
5422 
5423   Input Parameters:
5424 + A     - the matrix
5425 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5426 . row   - index set of rows to extract (or `NULL`)
5427 - col   - index set of columns to extract (or `NULL`)
5428 
5429   Output Parameter:
5430 . A_loc - the local sequential matrix generated
5431 
5432   Level: developer
5433 
5434 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5435 @*/
5436 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5437 {
5438   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5439   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5440   IS          isrowa, iscola;
5441   Mat        *aloc;
5442   PetscBool   match;
5443 
5444   PetscFunctionBegin;
5445   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5446   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5447   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5448   if (!row) {
5449     start = A->rmap->rstart;
5450     end   = A->rmap->rend;
5451     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5452   } else {
5453     isrowa = *row;
5454   }
5455   if (!col) {
5456     start = A->cmap->rstart;
5457     cmap  = a->garray;
5458     nzA   = a->A->cmap->n;
5459     nzB   = a->B->cmap->n;
5460     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5461     ncols = 0;
5462     for (i = 0; i < nzB; i++) {
5463       if (cmap[i] < start) idx[ncols++] = cmap[i];
5464       else break;
5465     }
5466     imark = i;
5467     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5468     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5469     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5470   } else {
5471     iscola = *col;
5472   }
5473   if (scall != MAT_INITIAL_MATRIX) {
5474     PetscCall(PetscMalloc1(1, &aloc));
5475     aloc[0] = *A_loc;
5476   }
5477   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5478   if (!col) { /* attach global id of condensed columns */
5479     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5480   }
5481   *A_loc = aloc[0];
5482   PetscCall(PetscFree(aloc));
5483   if (!row) PetscCall(ISDestroy(&isrowa));
5484   if (!col) PetscCall(ISDestroy(&iscola));
5485   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5486   PetscFunctionReturn(PETSC_SUCCESS);
5487 }
5488 
5489 /*
5490  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5491  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5492  * on a global size.
5493  * */
5494 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5495 {
5496   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5497   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5498   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5499   PetscMPIInt            owner;
5500   PetscSFNode           *iremote, *oiremote;
5501   const PetscInt        *lrowindices;
5502   PetscSF                sf, osf;
5503   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5504   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5505   MPI_Comm               comm;
5506   ISLocalToGlobalMapping mapping;
5507   const PetscScalar     *pd_a, *po_a;
5508 
5509   PetscFunctionBegin;
5510   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5511   /* plocalsize is the number of roots
5512    * nrows is the number of leaves
5513    * */
5514   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5515   PetscCall(ISGetLocalSize(rows, &nrows));
5516   PetscCall(PetscCalloc1(nrows, &iremote));
5517   PetscCall(ISGetIndices(rows, &lrowindices));
5518   for (i = 0; i < nrows; i++) {
5519     /* Find a remote index and an owner for a row
5520      * The row could be local or remote
5521      * */
5522     owner = 0;
5523     lidx  = 0;
5524     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5525     iremote[i].index = lidx;
5526     iremote[i].rank  = owner;
5527   }
5528   /* Create SF to communicate how many nonzero columns for each row */
5529   PetscCall(PetscSFCreate(comm, &sf));
5530   /* SF will figure out the number of nonzero columns for each row, and their
5531    * offsets
5532    * */
5533   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5534   PetscCall(PetscSFSetFromOptions(sf));
5535   PetscCall(PetscSFSetUp(sf));
5536 
5537   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5538   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5539   PetscCall(PetscCalloc1(nrows, &pnnz));
5540   roffsets[0] = 0;
5541   roffsets[1] = 0;
5542   for (i = 0; i < plocalsize; i++) {
5543     /* diagonal */
5544     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5545     /* off-diagonal */
5546     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5547     /* compute offsets so that we relative location for each row */
5548     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5549     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5550   }
5551   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5552   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5553   /* 'r' means root, and 'l' means leaf */
5554   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5555   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5556   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5557   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5558   PetscCall(PetscSFDestroy(&sf));
5559   PetscCall(PetscFree(roffsets));
5560   PetscCall(PetscFree(nrcols));
5561   dntotalcols = 0;
5562   ontotalcols = 0;
5563   ncol        = 0;
5564   for (i = 0; i < nrows; i++) {
5565     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5566     ncol    = PetscMax(pnnz[i], ncol);
5567     /* diagonal */
5568     dntotalcols += nlcols[i * 2 + 0];
5569     /* off-diagonal */
5570     ontotalcols += nlcols[i * 2 + 1];
5571   }
5572   /* We do not need to figure the right number of columns
5573    * since all the calculations will be done by going through the raw data
5574    * */
5575   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5576   PetscCall(MatSetUp(*P_oth));
5577   PetscCall(PetscFree(pnnz));
5578   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5579   /* diagonal */
5580   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5581   /* off-diagonal */
5582   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5583   /* diagonal */
5584   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5585   /* off-diagonal */
5586   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5587   dntotalcols = 0;
5588   ontotalcols = 0;
5589   ntotalcols  = 0;
5590   for (i = 0; i < nrows; i++) {
5591     owner = 0;
5592     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5593     /* Set iremote for diag matrix */
5594     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5595       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5596       iremote[dntotalcols].rank  = owner;
5597       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5598       ilocal[dntotalcols++] = ntotalcols++;
5599     }
5600     /* off-diagonal */
5601     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5602       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5603       oiremote[ontotalcols].rank  = owner;
5604       oilocal[ontotalcols++]      = ntotalcols++;
5605     }
5606   }
5607   PetscCall(ISRestoreIndices(rows, &lrowindices));
5608   PetscCall(PetscFree(loffsets));
5609   PetscCall(PetscFree(nlcols));
5610   PetscCall(PetscSFCreate(comm, &sf));
5611   /* P serves as roots and P_oth is leaves
5612    * Diag matrix
5613    * */
5614   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5615   PetscCall(PetscSFSetFromOptions(sf));
5616   PetscCall(PetscSFSetUp(sf));
5617 
5618   PetscCall(PetscSFCreate(comm, &osf));
5619   /* off-diagonal */
5620   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5621   PetscCall(PetscSFSetFromOptions(osf));
5622   PetscCall(PetscSFSetUp(osf));
5623   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5624   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5625   /* operate on the matrix internal data to save memory */
5626   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5627   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5628   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5629   /* Convert to global indices for diag matrix */
5630   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5631   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5632   /* We want P_oth store global indices */
5633   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5634   /* Use memory scalable approach */
5635   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5636   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5637   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5638   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5639   /* Convert back to local indices */
5640   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5641   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5642   nout = 0;
5643   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5644   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5645   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5646   /* Exchange values */
5647   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5648   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5649   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5650   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5651   /* Stop PETSc from shrinking memory */
5652   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5653   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5654   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5655   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5656   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5657   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5658   PetscCall(PetscSFDestroy(&sf));
5659   PetscCall(PetscSFDestroy(&osf));
5660   PetscFunctionReturn(PETSC_SUCCESS);
5661 }
5662 
5663 /*
5664  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5665  * This supports MPIAIJ and MAIJ
5666  * */
5667 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5668 {
5669   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5670   Mat_SeqAIJ *p_oth;
5671   IS          rows, map;
5672   PetscHMapI  hamp;
5673   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5674   MPI_Comm    comm;
5675   PetscSF     sf, osf;
5676   PetscBool   has;
5677 
5678   PetscFunctionBegin;
5679   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5680   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5681   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5682    *  and then create a submatrix (that often is an overlapping matrix)
5683    * */
5684   if (reuse == MAT_INITIAL_MATRIX) {
5685     /* Use a hash table to figure out unique keys */
5686     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5687     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5688     count = 0;
5689     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5690     for (i = 0; i < a->B->cmap->n; i++) {
5691       key = a->garray[i] / dof;
5692       PetscCall(PetscHMapIHas(hamp, key, &has));
5693       if (!has) {
5694         mapping[i] = count;
5695         PetscCall(PetscHMapISet(hamp, key, count++));
5696       } else {
5697         /* Current 'i' has the same value the previous step */
5698         mapping[i] = count - 1;
5699       }
5700     }
5701     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5702     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5703     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5704     PetscCall(PetscCalloc1(htsize, &rowindices));
5705     off = 0;
5706     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5707     PetscCall(PetscHMapIDestroy(&hamp));
5708     PetscCall(PetscSortInt(htsize, rowindices));
5709     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5710     /* In case, the matrix was already created but users want to recreate the matrix */
5711     PetscCall(MatDestroy(P_oth));
5712     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5713     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5714     PetscCall(ISDestroy(&map));
5715     PetscCall(ISDestroy(&rows));
5716   } else if (reuse == MAT_REUSE_MATRIX) {
5717     /* If matrix was already created, we simply update values using SF objects
5718      * that as attached to the matrix earlier.
5719      */
5720     const PetscScalar *pd_a, *po_a;
5721 
5722     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5723     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5724     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5725     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5726     /* Update values in place */
5727     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5728     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5729     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5730     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5731     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5732     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5733     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5734     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5735   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5736   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5737   PetscFunctionReturn(PETSC_SUCCESS);
5738 }
5739 
5740 /*@C
5741   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5742 
5743   Collective
5744 
5745   Input Parameters:
5746 + A     - the first matrix in `MATMPIAIJ` format
5747 . B     - the second matrix in `MATMPIAIJ` format
5748 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5749 
5750   Output Parameters:
5751 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5752 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5753 - B_seq - the sequential matrix generated
5754 
5755   Level: developer
5756 
5757 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5758 @*/
5759 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5760 {
5761   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5762   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5763   IS          isrowb, iscolb;
5764   Mat        *bseq = NULL;
5765 
5766   PetscFunctionBegin;
5767   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5768              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5769   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5770 
5771   if (scall == MAT_INITIAL_MATRIX) {
5772     start = A->cmap->rstart;
5773     cmap  = a->garray;
5774     nzA   = a->A->cmap->n;
5775     nzB   = a->B->cmap->n;
5776     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5777     ncols = 0;
5778     for (i = 0; i < nzB; i++) { /* row < local row index */
5779       if (cmap[i] < start) idx[ncols++] = cmap[i];
5780       else break;
5781     }
5782     imark = i;
5783     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5784     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5785     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5786     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5787   } else {
5788     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5789     isrowb = *rowb;
5790     iscolb = *colb;
5791     PetscCall(PetscMalloc1(1, &bseq));
5792     bseq[0] = *B_seq;
5793   }
5794   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5795   *B_seq = bseq[0];
5796   PetscCall(PetscFree(bseq));
5797   if (!rowb) {
5798     PetscCall(ISDestroy(&isrowb));
5799   } else {
5800     *rowb = isrowb;
5801   }
5802   if (!colb) {
5803     PetscCall(ISDestroy(&iscolb));
5804   } else {
5805     *colb = iscolb;
5806   }
5807   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5808   PetscFunctionReturn(PETSC_SUCCESS);
5809 }
5810 
5811 /*
5812     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5813     of the OFF-DIAGONAL portion of local A
5814 
5815     Collective
5816 
5817    Input Parameters:
5818 +    A,B - the matrices in `MATMPIAIJ` format
5819 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5820 
5821    Output Parameter:
5822 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5823 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5824 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5825 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5826 
5827     Developer Note:
5828     This directly accesses information inside the VecScatter associated with the matrix-vector product
5829      for this matrix. This is not desirable..
5830 
5831     Level: developer
5832 
5833 */
5834 
5835 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5836 {
5837   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5838   VecScatter         ctx;
5839   MPI_Comm           comm;
5840   const PetscMPIInt *rprocs, *sprocs;
5841   PetscMPIInt        nrecvs, nsends;
5842   const PetscInt    *srow, *rstarts, *sstarts;
5843   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5844   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5845   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5846   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5847   PetscMPIInt        size, tag, rank, nreqs;
5848 
5849   PetscFunctionBegin;
5850   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5851   PetscCallMPI(MPI_Comm_size(comm, &size));
5852 
5853   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5854              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5855   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5856   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5857 
5858   if (size == 1) {
5859     startsj_s = NULL;
5860     bufa_ptr  = NULL;
5861     *B_oth    = NULL;
5862     PetscFunctionReturn(PETSC_SUCCESS);
5863   }
5864 
5865   ctx = a->Mvctx;
5866   tag = ((PetscObject)ctx)->tag;
5867 
5868   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5869   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5870   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5871   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5872   PetscCall(PetscMalloc1(nreqs, &reqs));
5873   rwaits = reqs;
5874   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5875 
5876   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5877   if (scall == MAT_INITIAL_MATRIX) {
5878     /* i-array */
5879     /*  post receives */
5880     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5881     for (i = 0; i < nrecvs; i++) {
5882       rowlen = rvalues + rstarts[i] * rbs;
5883       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5884       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5885     }
5886 
5887     /* pack the outgoing message */
5888     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5889 
5890     sstartsj[0] = 0;
5891     rstartsj[0] = 0;
5892     len         = 0; /* total length of j or a array to be sent */
5893     if (nsends) {
5894       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5895       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5896     }
5897     for (i = 0; i < nsends; i++) {
5898       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5899       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5900       for (j = 0; j < nrows; j++) {
5901         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5902         for (l = 0; l < sbs; l++) {
5903           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5904 
5905           rowlen[j * sbs + l] = ncols;
5906 
5907           len += ncols;
5908           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5909         }
5910         k++;
5911       }
5912       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5913 
5914       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5915     }
5916     /* recvs and sends of i-array are completed */
5917     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5918     PetscCall(PetscFree(svalues));
5919 
5920     /* allocate buffers for sending j and a arrays */
5921     PetscCall(PetscMalloc1(len + 1, &bufj));
5922     PetscCall(PetscMalloc1(len + 1, &bufa));
5923 
5924     /* create i-array of B_oth */
5925     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5926 
5927     b_othi[0] = 0;
5928     len       = 0; /* total length of j or a array to be received */
5929     k         = 0;
5930     for (i = 0; i < nrecvs; i++) {
5931       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5932       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5933       for (j = 0; j < nrows; j++) {
5934         b_othi[k + 1] = b_othi[k] + rowlen[j];
5935         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5936         k++;
5937       }
5938       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5939     }
5940     PetscCall(PetscFree(rvalues));
5941 
5942     /* allocate space for j and a arrays of B_oth */
5943     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5944     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5945 
5946     /* j-array */
5947     /*  post receives of j-array */
5948     for (i = 0; i < nrecvs; i++) {
5949       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5950       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5951     }
5952 
5953     /* pack the outgoing message j-array */
5954     if (nsends) k = sstarts[0];
5955     for (i = 0; i < nsends; i++) {
5956       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5957       bufJ  = bufj + sstartsj[i];
5958       for (j = 0; j < nrows; j++) {
5959         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5960         for (ll = 0; ll < sbs; ll++) {
5961           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5962           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5963           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5964         }
5965       }
5966       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5967     }
5968 
5969     /* recvs and sends of j-array are completed */
5970     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5971   } else if (scall == MAT_REUSE_MATRIX) {
5972     sstartsj = *startsj_s;
5973     rstartsj = *startsj_r;
5974     bufa     = *bufa_ptr;
5975     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5976   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5977 
5978   /* a-array */
5979   /*  post receives of a-array */
5980   for (i = 0; i < nrecvs; i++) {
5981     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5982     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5983   }
5984 
5985   /* pack the outgoing message a-array */
5986   if (nsends) k = sstarts[0];
5987   for (i = 0; i < nsends; i++) {
5988     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5989     bufA  = bufa + sstartsj[i];
5990     for (j = 0; j < nrows; j++) {
5991       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5992       for (ll = 0; ll < sbs; ll++) {
5993         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5994         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5995         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5996       }
5997     }
5998     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5999   }
6000   /* recvs and sends of a-array are completed */
6001   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6002   PetscCall(PetscFree(reqs));
6003 
6004   if (scall == MAT_INITIAL_MATRIX) {
6005     Mat_SeqAIJ *b_oth;
6006 
6007     /* put together the new matrix */
6008     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6009 
6010     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6011     /* Since these are PETSc arrays, change flags to free them as necessary. */
6012     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6013     b_oth->free_a  = PETSC_TRUE;
6014     b_oth->free_ij = PETSC_TRUE;
6015     b_oth->nonew   = 0;
6016 
6017     PetscCall(PetscFree(bufj));
6018     if (!startsj_s || !bufa_ptr) {
6019       PetscCall(PetscFree2(sstartsj, rstartsj));
6020       PetscCall(PetscFree(bufa_ptr));
6021     } else {
6022       *startsj_s = sstartsj;
6023       *startsj_r = rstartsj;
6024       *bufa_ptr  = bufa;
6025     }
6026   } else if (scall == MAT_REUSE_MATRIX) {
6027     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6028   }
6029 
6030   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6031   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6032   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6033   PetscFunctionReturn(PETSC_SUCCESS);
6034 }
6035 
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6039 #if defined(PETSC_HAVE_MKL_SPARSE)
6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6041 #endif
6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6044 #if defined(PETSC_HAVE_ELEMENTAL)
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6046 #endif
6047 #if defined(PETSC_HAVE_SCALAPACK)
6048 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6049 #endif
6050 #if defined(PETSC_HAVE_HYPRE)
6051 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6052 #endif
6053 #if defined(PETSC_HAVE_CUDA)
6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6055 #endif
6056 #if defined(PETSC_HAVE_HIP)
6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6058 #endif
6059 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6061 #endif
6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6063 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6064 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6065 
6066 /*
6067     Computes (B'*A')' since computing B*A directly is untenable
6068 
6069                n                       p                          p
6070         [             ]       [             ]         [                 ]
6071       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6072         [             ]       [             ]         [                 ]
6073 
6074 */
6075 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6076 {
6077   Mat At, Bt, Ct;
6078 
6079   PetscFunctionBegin;
6080   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6081   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6082   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6083   PetscCall(MatDestroy(&At));
6084   PetscCall(MatDestroy(&Bt));
6085   PetscCall(MatTransposeSetPrecursor(Ct, C));
6086   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6087   PetscCall(MatDestroy(&Ct));
6088   PetscFunctionReturn(PETSC_SUCCESS);
6089 }
6090 
6091 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6092 {
6093   PetscBool cisdense;
6094 
6095   PetscFunctionBegin;
6096   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6097   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6098   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6099   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6100   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6101   PetscCall(MatSetUp(C));
6102 
6103   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6104   PetscFunctionReturn(PETSC_SUCCESS);
6105 }
6106 
6107 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6108 {
6109   Mat_Product *product = C->product;
6110   Mat          A = product->A, B = product->B;
6111 
6112   PetscFunctionBegin;
6113   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6114              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6115   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6116   C->ops->productsymbolic = MatProductSymbolic_AB;
6117   PetscFunctionReturn(PETSC_SUCCESS);
6118 }
6119 
6120 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6121 {
6122   Mat_Product *product = C->product;
6123 
6124   PetscFunctionBegin;
6125   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6126   PetscFunctionReturn(PETSC_SUCCESS);
6127 }
6128 
6129 /*
6130    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6131 
6132   Input Parameters:
6133 
6134     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6135     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6136 
6137     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6138 
6139     For Set1, j1[] contains column indices of the nonzeros.
6140     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6141     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6142     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6143 
6144     Similar for Set2.
6145 
6146     This routine merges the two sets of nonzeros row by row and removes repeats.
6147 
6148   Output Parameters: (memory is allocated by the caller)
6149 
6150     i[],j[]: the CSR of the merged matrix, which has m rows.
6151     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6152     imap2[]: similar to imap1[], but for Set2.
6153     Note we order nonzeros row-by-row and from left to right.
6154 */
6155 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6156 {
6157   PetscInt   r, m; /* Row index of mat */
6158   PetscCount t, t1, t2, b1, e1, b2, e2;
6159 
6160   PetscFunctionBegin;
6161   PetscCall(MatGetLocalSize(mat, &m, NULL));
6162   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6163   i[0]        = 0;
6164   for (r = 0; r < m; r++) { /* Do row by row merging */
6165     b1 = rowBegin1[r];
6166     e1 = rowEnd1[r];
6167     b2 = rowBegin2[r];
6168     e2 = rowEnd2[r];
6169     while (b1 < e1 && b2 < e2) {
6170       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6171         j[t]      = j1[b1];
6172         imap1[t1] = t;
6173         imap2[t2] = t;
6174         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6175         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6176         t1++;
6177         t2++;
6178         t++;
6179       } else if (j1[b1] < j2[b2]) {
6180         j[t]      = j1[b1];
6181         imap1[t1] = t;
6182         b1 += jmap1[t1 + 1] - jmap1[t1];
6183         t1++;
6184         t++;
6185       } else {
6186         j[t]      = j2[b2];
6187         imap2[t2] = t;
6188         b2 += jmap2[t2 + 1] - jmap2[t2];
6189         t2++;
6190         t++;
6191       }
6192     }
6193     /* Merge the remaining in either j1[] or j2[] */
6194     while (b1 < e1) {
6195       j[t]      = j1[b1];
6196       imap1[t1] = t;
6197       b1 += jmap1[t1 + 1] - jmap1[t1];
6198       t1++;
6199       t++;
6200     }
6201     while (b2 < e2) {
6202       j[t]      = j2[b2];
6203       imap2[t2] = t;
6204       b2 += jmap2[t2 + 1] - jmap2[t2];
6205       t2++;
6206       t++;
6207     }
6208     PetscCall(PetscIntCast(t, i + r + 1));
6209   }
6210   PetscFunctionReturn(PETSC_SUCCESS);
6211 }
6212 
6213 /*
6214   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6215 
6216   Input Parameters:
6217     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6218     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6219       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6220 
6221       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6222       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6223 
6224   Output Parameters:
6225     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6226     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6227       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6228       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6229 
6230     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6231       Atot: number of entries belonging to the diagonal block.
6232       Annz: number of unique nonzeros belonging to the diagonal block.
6233       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6234         repeats (i.e., same 'i,j' pair).
6235       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6236         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6237 
6238       Atot: number of entries belonging to the diagonal block
6239       Annz: number of unique nonzeros belonging to the diagonal block.
6240 
6241     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6242 
6243     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6244 */
6245 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6246 {
6247   PetscInt    cstart, cend, rstart, rend, row, col;
6248   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6249   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6250   PetscCount  k, m, p, q, r, s, mid;
6251   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6252 
6253   PetscFunctionBegin;
6254   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6255   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6256   m = rend - rstart;
6257 
6258   /* Skip negative rows */
6259   for (k = 0; k < n; k++)
6260     if (i[k] >= 0) break;
6261 
6262   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6263      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6264   */
6265   while (k < n) {
6266     row = i[k];
6267     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6268     for (s = k; s < n; s++)
6269       if (i[s] != row) break;
6270 
6271     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6272     for (p = k; p < s; p++) {
6273       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6274       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6275     }
6276     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6277     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6278     rowBegin[row - rstart] = k;
6279     rowMid[row - rstart]   = mid;
6280     rowEnd[row - rstart]   = s;
6281 
6282     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6283     Atot += mid - k;
6284     Btot += s - mid;
6285 
6286     /* Count unique nonzeros of this diag row */
6287     for (p = k; p < mid;) {
6288       col = j[p];
6289       do {
6290         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6291         p++;
6292       } while (p < mid && j[p] == col);
6293       Annz++;
6294     }
6295 
6296     /* Count unique nonzeros of this offdiag row */
6297     for (p = mid; p < s;) {
6298       col = j[p];
6299       do {
6300         p++;
6301       } while (p < s && j[p] == col);
6302       Bnnz++;
6303     }
6304     k = s;
6305   }
6306 
6307   /* Allocation according to Atot, Btot, Annz, Bnnz */
6308   PetscCall(PetscMalloc1(Atot, &Aperm));
6309   PetscCall(PetscMalloc1(Btot, &Bperm));
6310   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6311   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6312 
6313   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6314   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6315   for (r = 0; r < m; r++) {
6316     k   = rowBegin[r];
6317     mid = rowMid[r];
6318     s   = rowEnd[r];
6319     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6320     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6321     Atot += mid - k;
6322     Btot += s - mid;
6323 
6324     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6325     for (p = k; p < mid;) {
6326       col = j[p];
6327       q   = p;
6328       do {
6329         p++;
6330       } while (p < mid && j[p] == col);
6331       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6332       Annz++;
6333     }
6334 
6335     for (p = mid; p < s;) {
6336       col = j[p];
6337       q   = p;
6338       do {
6339         p++;
6340       } while (p < s && j[p] == col);
6341       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6342       Bnnz++;
6343     }
6344   }
6345   /* Output */
6346   *Aperm_ = Aperm;
6347   *Annz_  = Annz;
6348   *Atot_  = Atot;
6349   *Ajmap_ = Ajmap;
6350   *Bperm_ = Bperm;
6351   *Bnnz_  = Bnnz;
6352   *Btot_  = Btot;
6353   *Bjmap_ = Bjmap;
6354   PetscFunctionReturn(PETSC_SUCCESS);
6355 }
6356 
6357 /*
6358   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6359 
6360   Input Parameters:
6361     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6362     nnz:  number of unique nonzeros in the merged matrix
6363     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6364     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6365 
6366   Output Parameter: (memory is allocated by the caller)
6367     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6368 
6369   Example:
6370     nnz1 = 4
6371     nnz  = 6
6372     imap = [1,3,4,5]
6373     jmap = [0,3,5,6,7]
6374    then,
6375     jmap_new = [0,0,3,3,5,6,7]
6376 */
6377 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6378 {
6379   PetscCount k, p;
6380 
6381   PetscFunctionBegin;
6382   jmap_new[0] = 0;
6383   p           = nnz;                /* p loops over jmap_new[] backwards */
6384   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6385     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6386   }
6387   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6388   PetscFunctionReturn(PETSC_SUCCESS);
6389 }
6390 
6391 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6392 {
6393   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6394 
6395   PetscFunctionBegin;
6396   PetscCall(PetscSFDestroy(&coo->sf));
6397   PetscCall(PetscFree(coo->Aperm1));
6398   PetscCall(PetscFree(coo->Bperm1));
6399   PetscCall(PetscFree(coo->Ajmap1));
6400   PetscCall(PetscFree(coo->Bjmap1));
6401   PetscCall(PetscFree(coo->Aimap2));
6402   PetscCall(PetscFree(coo->Bimap2));
6403   PetscCall(PetscFree(coo->Aperm2));
6404   PetscCall(PetscFree(coo->Bperm2));
6405   PetscCall(PetscFree(coo->Ajmap2));
6406   PetscCall(PetscFree(coo->Bjmap2));
6407   PetscCall(PetscFree(coo->Cperm1));
6408   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6409   PetscCall(PetscFree(coo));
6410   PetscFunctionReturn(PETSC_SUCCESS);
6411 }
6412 
6413 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6414 {
6415   MPI_Comm             comm;
6416   PetscMPIInt          rank, size;
6417   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6418   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6419   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6420   PetscContainer       container;
6421   MatCOOStruct_MPIAIJ *coo;
6422 
6423   PetscFunctionBegin;
6424   PetscCall(PetscFree(mpiaij->garray));
6425   PetscCall(VecDestroy(&mpiaij->lvec));
6426 #if defined(PETSC_USE_CTABLE)
6427   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6428 #else
6429   PetscCall(PetscFree(mpiaij->colmap));
6430 #endif
6431   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6432   mat->assembled     = PETSC_FALSE;
6433   mat->was_assembled = PETSC_FALSE;
6434 
6435   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6436   PetscCallMPI(MPI_Comm_size(comm, &size));
6437   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6438   PetscCall(PetscLayoutSetUp(mat->rmap));
6439   PetscCall(PetscLayoutSetUp(mat->cmap));
6440   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6441   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6442   PetscCall(MatGetLocalSize(mat, &m, &n));
6443   PetscCall(MatGetSize(mat, &M, &N));
6444 
6445   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6446   /* entries come first, then local rows, then remote rows.                     */
6447   PetscCount n1 = coo_n, *perm1;
6448   PetscInt  *i1 = coo_i, *j1 = coo_j;
6449 
6450   PetscCall(PetscMalloc1(n1, &perm1));
6451   for (k = 0; k < n1; k++) perm1[k] = k;
6452 
6453   /* Manipulate indices so that entries with negative row or col indices will have smallest
6454      row indices, local entries will have greater but negative row indices, and remote entries
6455      will have positive row indices.
6456   */
6457   for (k = 0; k < n1; k++) {
6458     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6459     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6460     else {
6461       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6462       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6463     }
6464   }
6465 
6466   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6467   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6468 
6469   /* Advance k to the first entry we need to take care of */
6470   for (k = 0; k < n1; k++)
6471     if (i1[k] > PETSC_INT_MIN) break;
6472   PetscCount i1start = k;
6473 
6474   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6475   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6476 
6477   /*           Send remote rows to their owner                                  */
6478   /* Find which rows should be sent to which remote ranks*/
6479   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6480   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6481   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6482   const PetscInt *ranges;
6483   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6484 
6485   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6486   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6487   for (k = rem; k < n1;) {
6488     PetscMPIInt owner;
6489     PetscInt    firstRow, lastRow;
6490 
6491     /* Locate a row range */
6492     firstRow = i1[k]; /* first row of this owner */
6493     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6494     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6495 
6496     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6497     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6498 
6499     /* All entries in [k,p) belong to this remote owner */
6500     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6501       PetscMPIInt *sendto2;
6502       PetscInt    *nentries2;
6503       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6504 
6505       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6506       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6507       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6508       PetscCall(PetscFree2(sendto, nentries2));
6509       sendto   = sendto2;
6510       nentries = nentries2;
6511       maxNsend = maxNsend2;
6512     }
6513     sendto[nsend] = owner;
6514     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6515     nsend++;
6516     k = p;
6517   }
6518 
6519   /* Build 1st SF to know offsets on remote to send data */
6520   PetscSF      sf1;
6521   PetscInt     nroots = 1, nroots2 = 0;
6522   PetscInt     nleaves = nsend, nleaves2 = 0;
6523   PetscInt    *offsets;
6524   PetscSFNode *iremote;
6525 
6526   PetscCall(PetscSFCreate(comm, &sf1));
6527   PetscCall(PetscMalloc1(nsend, &iremote));
6528   PetscCall(PetscMalloc1(nsend, &offsets));
6529   for (k = 0; k < nsend; k++) {
6530     iremote[k].rank  = sendto[k];
6531     iremote[k].index = 0;
6532     nleaves2 += nentries[k];
6533     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6534   }
6535   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6536   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6537   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6538   PetscCall(PetscSFDestroy(&sf1));
6539   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6540 
6541   /* Build 2nd SF to send remote COOs to their owner */
6542   PetscSF sf2;
6543   nroots  = nroots2;
6544   nleaves = nleaves2;
6545   PetscCall(PetscSFCreate(comm, &sf2));
6546   PetscCall(PetscSFSetFromOptions(sf2));
6547   PetscCall(PetscMalloc1(nleaves, &iremote));
6548   p = 0;
6549   for (k = 0; k < nsend; k++) {
6550     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6551     for (q = 0; q < nentries[k]; q++, p++) {
6552       iremote[p].rank = sendto[k];
6553       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6554     }
6555   }
6556   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6557 
6558   /* Send the remote COOs to their owner */
6559   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6560   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6561   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6562   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6563   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6564   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6565   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6566   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6567   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6568   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6569   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6570 
6571   PetscCall(PetscFree(offsets));
6572   PetscCall(PetscFree2(sendto, nentries));
6573 
6574   /* Sort received COOs by row along with the permutation array     */
6575   for (k = 0; k < n2; k++) perm2[k] = k;
6576   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6577 
6578   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6579   PetscCount *Cperm1;
6580   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6581   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6582   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6583   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6584 
6585   /* Support for HYPRE matrices, kind of a hack.
6586      Swap min column with diagonal so that diagonal values will go first */
6587   PetscBool hypre;
6588   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6589   if (hypre) {
6590     PetscInt *minj;
6591     PetscBT   hasdiag;
6592 
6593     PetscCall(PetscBTCreate(m, &hasdiag));
6594     PetscCall(PetscMalloc1(m, &minj));
6595     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6596     for (k = i1start; k < rem; k++) {
6597       if (j1[k] < cstart || j1[k] >= cend) continue;
6598       const PetscInt rindex = i1[k] - rstart;
6599       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6600       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6601     }
6602     for (k = 0; k < n2; k++) {
6603       if (j2[k] < cstart || j2[k] >= cend) continue;
6604       const PetscInt rindex = i2[k] - rstart;
6605       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6606       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6607     }
6608     for (k = i1start; k < rem; k++) {
6609       const PetscInt rindex = i1[k] - rstart;
6610       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6611       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6612       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6613     }
6614     for (k = 0; k < n2; k++) {
6615       const PetscInt rindex = i2[k] - rstart;
6616       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6617       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6618       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6619     }
6620     PetscCall(PetscBTDestroy(&hasdiag));
6621     PetscCall(PetscFree(minj));
6622   }
6623 
6624   /* Split local COOs and received COOs into diag/offdiag portions */
6625   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6626   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6627   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6628   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6629   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6630   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6631 
6632   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6633   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6634   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6635   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6636 
6637   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6638   PetscInt *Ai, *Bi;
6639   PetscInt *Aj, *Bj;
6640 
6641   PetscCall(PetscMalloc1(m + 1, &Ai));
6642   PetscCall(PetscMalloc1(m + 1, &Bi));
6643   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6644   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6645 
6646   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6647   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6648   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6649   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6650   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6651 
6652   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6653   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6654 
6655   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6656   /* expect nonzeros in A/B most likely have local contributing entries        */
6657   PetscInt    Annz = Ai[m];
6658   PetscInt    Bnnz = Bi[m];
6659   PetscCount *Ajmap1_new, *Bjmap1_new;
6660 
6661   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6662   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6663 
6664   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6665   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6666 
6667   PetscCall(PetscFree(Aimap1));
6668   PetscCall(PetscFree(Ajmap1));
6669   PetscCall(PetscFree(Bimap1));
6670   PetscCall(PetscFree(Bjmap1));
6671   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6672   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6673   PetscCall(PetscFree(perm1));
6674   PetscCall(PetscFree3(i2, j2, perm2));
6675 
6676   Ajmap1 = Ajmap1_new;
6677   Bjmap1 = Bjmap1_new;
6678 
6679   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6680   if (Annz < Annz1 + Annz2) {
6681     PetscInt *Aj_new;
6682     PetscCall(PetscMalloc1(Annz, &Aj_new));
6683     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6684     PetscCall(PetscFree(Aj));
6685     Aj = Aj_new;
6686   }
6687 
6688   if (Bnnz < Bnnz1 + Bnnz2) {
6689     PetscInt *Bj_new;
6690     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6691     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6692     PetscCall(PetscFree(Bj));
6693     Bj = Bj_new;
6694   }
6695 
6696   /* Create new submatrices for on-process and off-process coupling                  */
6697   PetscScalar     *Aa, *Ba;
6698   MatType          rtype;
6699   Mat_SeqAIJ      *a, *b;
6700   PetscObjectState state;
6701   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6702   PetscCall(PetscCalloc1(Bnnz, &Ba));
6703   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6704   if (cstart) {
6705     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6706   }
6707 
6708   PetscCall(MatGetRootType_Private(mat, &rtype));
6709 
6710   MatSeqXAIJGetOptions_Private(mpiaij->A);
6711   PetscCall(MatDestroy(&mpiaij->A));
6712   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6713   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6714   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6715 
6716   MatSeqXAIJGetOptions_Private(mpiaij->B);
6717   PetscCall(MatDestroy(&mpiaij->B));
6718   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6719   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6720   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6721 
6722   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6723   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6724   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6725   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6726 
6727   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6728   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6729   a->free_a  = PETSC_TRUE;
6730   a->free_ij = PETSC_TRUE;
6731   b->free_a  = PETSC_TRUE;
6732   b->free_ij = PETSC_TRUE;
6733   a->maxnz   = a->nz;
6734   b->maxnz   = b->nz;
6735 
6736   /* conversion must happen AFTER multiply setup */
6737   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6738   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6739   PetscCall(VecDestroy(&mpiaij->lvec));
6740   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6741 
6742   // Put the COO struct in a container and then attach that to the matrix
6743   PetscCall(PetscMalloc1(1, &coo));
6744   coo->n       = coo_n;
6745   coo->sf      = sf2;
6746   coo->sendlen = nleaves;
6747   coo->recvlen = nroots;
6748   coo->Annz    = Annz;
6749   coo->Bnnz    = Bnnz;
6750   coo->Annz2   = Annz2;
6751   coo->Bnnz2   = Bnnz2;
6752   coo->Atot1   = Atot1;
6753   coo->Atot2   = Atot2;
6754   coo->Btot1   = Btot1;
6755   coo->Btot2   = Btot2;
6756   coo->Ajmap1  = Ajmap1;
6757   coo->Aperm1  = Aperm1;
6758   coo->Bjmap1  = Bjmap1;
6759   coo->Bperm1  = Bperm1;
6760   coo->Aimap2  = Aimap2;
6761   coo->Ajmap2  = Ajmap2;
6762   coo->Aperm2  = Aperm2;
6763   coo->Bimap2  = Bimap2;
6764   coo->Bjmap2  = Bjmap2;
6765   coo->Bperm2  = Bperm2;
6766   coo->Cperm1  = Cperm1;
6767   // Allocate in preallocation. If not used, it has zero cost on host
6768   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6769   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6770   PetscCall(PetscContainerSetPointer(container, coo));
6771   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6772   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6773   PetscCall(PetscContainerDestroy(&container));
6774   PetscFunctionReturn(PETSC_SUCCESS);
6775 }
6776 
6777 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6778 {
6779   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6780   Mat                  A = mpiaij->A, B = mpiaij->B;
6781   PetscScalar         *Aa, *Ba;
6782   PetscScalar         *sendbuf, *recvbuf;
6783   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6784   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6785   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6786   const PetscCount    *Cperm1;
6787   PetscContainer       container;
6788   MatCOOStruct_MPIAIJ *coo;
6789 
6790   PetscFunctionBegin;
6791   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6792   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6793   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6794   sendbuf = coo->sendbuf;
6795   recvbuf = coo->recvbuf;
6796   Ajmap1  = coo->Ajmap1;
6797   Ajmap2  = coo->Ajmap2;
6798   Aimap2  = coo->Aimap2;
6799   Bjmap1  = coo->Bjmap1;
6800   Bjmap2  = coo->Bjmap2;
6801   Bimap2  = coo->Bimap2;
6802   Aperm1  = coo->Aperm1;
6803   Aperm2  = coo->Aperm2;
6804   Bperm1  = coo->Bperm1;
6805   Bperm2  = coo->Bperm2;
6806   Cperm1  = coo->Cperm1;
6807 
6808   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6809   PetscCall(MatSeqAIJGetArray(B, &Ba));
6810 
6811   /* Pack entries to be sent to remote */
6812   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6813 
6814   /* Send remote entries to their owner and overlap the communication with local computation */
6815   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6816   /* Add local entries to A and B */
6817   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6818     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6819     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6820     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6821   }
6822   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6823     PetscScalar sum = 0.0;
6824     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6825     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6826   }
6827   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6828 
6829   /* Add received remote entries to A and B */
6830   for (PetscCount i = 0; i < coo->Annz2; i++) {
6831     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6832   }
6833   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6834     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6835   }
6836   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6837   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6838   PetscFunctionReturn(PETSC_SUCCESS);
6839 }
6840 
6841 /*MC
6842    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6843 
6844    Options Database Keys:
6845 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6846 
6847    Level: beginner
6848 
6849    Notes:
6850    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6851     in this case the values associated with the rows and columns one passes in are set to zero
6852     in the matrix
6853 
6854     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6855     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6856 
6857 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6858 M*/
6859 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6860 {
6861   Mat_MPIAIJ *b;
6862   PetscMPIInt size;
6863 
6864   PetscFunctionBegin;
6865   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6866 
6867   PetscCall(PetscNew(&b));
6868   B->data       = (void *)b;
6869   B->ops[0]     = MatOps_Values;
6870   B->assembled  = PETSC_FALSE;
6871   B->insertmode = NOT_SET_VALUES;
6872   b->size       = size;
6873 
6874   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6875 
6876   /* build cache for off array entries formed */
6877   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6878 
6879   b->donotstash  = PETSC_FALSE;
6880   b->colmap      = NULL;
6881   b->garray      = NULL;
6882   b->roworiented = PETSC_TRUE;
6883 
6884   /* stuff used for matrix vector multiply */
6885   b->lvec  = NULL;
6886   b->Mvctx = NULL;
6887 
6888   /* stuff for MatGetRow() */
6889   b->rowindices   = NULL;
6890   b->rowvalues    = NULL;
6891   b->getrowactive = PETSC_FALSE;
6892 
6893   /* flexible pointer used in CUSPARSE classes */
6894   b->spptr = NULL;
6895 
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6906 #if defined(PETSC_HAVE_CUDA)
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6908 #endif
6909 #if defined(PETSC_HAVE_HIP)
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6911 #endif
6912 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6914 #endif
6915 #if defined(PETSC_HAVE_MKL_SPARSE)
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6917 #endif
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6922 #if defined(PETSC_HAVE_ELEMENTAL)
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6924 #endif
6925 #if defined(PETSC_HAVE_SCALAPACK)
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6927 #endif
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6930 #if defined(PETSC_HAVE_HYPRE)
6931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6933 #endif
6934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6938   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6939   PetscFunctionReturn(PETSC_SUCCESS);
6940 }
6941 
6942 /*@
6943   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6944   and "off-diagonal" part of the matrix in CSR format.
6945 
6946   Collective
6947 
6948   Input Parameters:
6949 + comm - MPI communicator
6950 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6951 . n    - This value should be the same as the local size used in creating the
6952          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6953          calculated if `N` is given) For square matrices `n` is almost always `m`.
6954 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6955 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6956 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6957 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6958 . a    - matrix values
6959 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6960 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6961 - oa   - matrix values
6962 
6963   Output Parameter:
6964 . mat - the matrix
6965 
6966   Level: advanced
6967 
6968   Notes:
6969   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6970   must free the arrays once the matrix has been destroyed and not before.
6971 
6972   The `i` and `j` indices are 0 based
6973 
6974   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6975 
6976   This sets local rows and cannot be used to set off-processor values.
6977 
6978   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6979   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6980   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6981   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6982   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6983   communication if it is known that only local entries will be set.
6984 
6985 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6986           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6987 @*/
6988 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6989 {
6990   Mat_MPIAIJ *maij;
6991 
6992   PetscFunctionBegin;
6993   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6994   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6995   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6996   PetscCall(MatCreate(comm, mat));
6997   PetscCall(MatSetSizes(*mat, m, n, M, N));
6998   PetscCall(MatSetType(*mat, MATMPIAIJ));
6999   maij = (Mat_MPIAIJ *)(*mat)->data;
7000 
7001   (*mat)->preallocated = PETSC_TRUE;
7002 
7003   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7004   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7005 
7006   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7007   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7008 
7009   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7010   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7011   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7012   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7013   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7014   PetscFunctionReturn(PETSC_SUCCESS);
7015 }
7016 
7017 typedef struct {
7018   Mat       *mp;    /* intermediate products */
7019   PetscBool *mptmp; /* is the intermediate product temporary ? */
7020   PetscInt   cp;    /* number of intermediate products */
7021 
7022   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7023   PetscInt    *startsj_s, *startsj_r;
7024   PetscScalar *bufa;
7025   Mat          P_oth;
7026 
7027   /* may take advantage of merging product->B */
7028   Mat Bloc; /* B-local by merging diag and off-diag */
7029 
7030   /* cusparse does not have support to split between symbolic and numeric phases.
7031      When api_user is true, we don't need to update the numerical values
7032      of the temporary storage */
7033   PetscBool reusesym;
7034 
7035   /* support for COO values insertion */
7036   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7037   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7038   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7039   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7040   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7041   PetscMemType mtype;
7042 
7043   /* customization */
7044   PetscBool abmerge;
7045   PetscBool P_oth_bind;
7046 } MatMatMPIAIJBACKEND;
7047 
7048 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7049 {
7050   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7051   PetscInt             i;
7052 
7053   PetscFunctionBegin;
7054   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7055   PetscCall(PetscFree(mmdata->bufa));
7056   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7057   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7058   PetscCall(MatDestroy(&mmdata->P_oth));
7059   PetscCall(MatDestroy(&mmdata->Bloc));
7060   PetscCall(PetscSFDestroy(&mmdata->sf));
7061   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7062   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7063   PetscCall(PetscFree(mmdata->own[0]));
7064   PetscCall(PetscFree(mmdata->own));
7065   PetscCall(PetscFree(mmdata->off[0]));
7066   PetscCall(PetscFree(mmdata->off));
7067   PetscCall(PetscFree(mmdata));
7068   PetscFunctionReturn(PETSC_SUCCESS);
7069 }
7070 
7071 /* Copy selected n entries with indices in idx[] of A to v[].
7072    If idx is NULL, copy the whole data array of A to v[]
7073  */
7074 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7075 {
7076   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7077 
7078   PetscFunctionBegin;
7079   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7080   if (f) {
7081     PetscCall((*f)(A, n, idx, v));
7082   } else {
7083     const PetscScalar *vv;
7084 
7085     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7086     if (n && idx) {
7087       PetscScalar    *w  = v;
7088       const PetscInt *oi = idx;
7089       PetscInt        j;
7090 
7091       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7092     } else {
7093       PetscCall(PetscArraycpy(v, vv, n));
7094     }
7095     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7096   }
7097   PetscFunctionReturn(PETSC_SUCCESS);
7098 }
7099 
7100 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7101 {
7102   MatMatMPIAIJBACKEND *mmdata;
7103   PetscInt             i, n_d, n_o;
7104 
7105   PetscFunctionBegin;
7106   MatCheckProduct(C, 1);
7107   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7108   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7109   if (!mmdata->reusesym) { /* update temporary matrices */
7110     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7111     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7112   }
7113   mmdata->reusesym = PETSC_FALSE;
7114 
7115   for (i = 0; i < mmdata->cp; i++) {
7116     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7117     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7118   }
7119   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7120     PetscInt noff;
7121 
7122     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7123     if (mmdata->mptmp[i]) continue;
7124     if (noff) {
7125       PetscInt nown;
7126 
7127       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7128       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7129       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7130       n_o += noff;
7131       n_d += nown;
7132     } else {
7133       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7134 
7135       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7136       n_d += mm->nz;
7137     }
7138   }
7139   if (mmdata->hasoffproc) { /* offprocess insertion */
7140     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7141     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7142   }
7143   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7144   PetscFunctionReturn(PETSC_SUCCESS);
7145 }
7146 
7147 /* Support for Pt * A, A * P, or Pt * A * P */
7148 #define MAX_NUMBER_INTERMEDIATE 4
7149 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7150 {
7151   Mat_Product           *product = C->product;
7152   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7153   Mat_MPIAIJ            *a, *p;
7154   MatMatMPIAIJBACKEND   *mmdata;
7155   ISLocalToGlobalMapping P_oth_l2g = NULL;
7156   IS                     glob      = NULL;
7157   const char            *prefix;
7158   char                   pprefix[256];
7159   const PetscInt        *globidx, *P_oth_idx;
7160   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7161   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7162   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7163                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7164                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7165   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7166 
7167   MatProductType ptype;
7168   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7169   PetscMPIInt    size;
7170 
7171   PetscFunctionBegin;
7172   MatCheckProduct(C, 1);
7173   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7174   ptype = product->type;
7175   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7176     ptype                                          = MATPRODUCT_AB;
7177     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7178   }
7179   switch (ptype) {
7180   case MATPRODUCT_AB:
7181     A          = product->A;
7182     P          = product->B;
7183     m          = A->rmap->n;
7184     n          = P->cmap->n;
7185     M          = A->rmap->N;
7186     N          = P->cmap->N;
7187     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7188     break;
7189   case MATPRODUCT_AtB:
7190     P          = product->A;
7191     A          = product->B;
7192     m          = P->cmap->n;
7193     n          = A->cmap->n;
7194     M          = P->cmap->N;
7195     N          = A->cmap->N;
7196     hasoffproc = PETSC_TRUE;
7197     break;
7198   case MATPRODUCT_PtAP:
7199     A          = product->A;
7200     P          = product->B;
7201     m          = P->cmap->n;
7202     n          = P->cmap->n;
7203     M          = P->cmap->N;
7204     N          = P->cmap->N;
7205     hasoffproc = PETSC_TRUE;
7206     break;
7207   default:
7208     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7209   }
7210   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7211   if (size == 1) hasoffproc = PETSC_FALSE;
7212 
7213   /* defaults */
7214   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7215     mp[i]    = NULL;
7216     mptmp[i] = PETSC_FALSE;
7217     rmapt[i] = -1;
7218     cmapt[i] = -1;
7219     rmapa[i] = NULL;
7220     cmapa[i] = NULL;
7221   }
7222 
7223   /* customization */
7224   PetscCall(PetscNew(&mmdata));
7225   mmdata->reusesym = product->api_user;
7226   if (ptype == MATPRODUCT_AB) {
7227     if (product->api_user) {
7228       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7229       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7230       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7231       PetscOptionsEnd();
7232     } else {
7233       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7234       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7235       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7236       PetscOptionsEnd();
7237     }
7238   } else if (ptype == MATPRODUCT_PtAP) {
7239     if (product->api_user) {
7240       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7241       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7242       PetscOptionsEnd();
7243     } else {
7244       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7245       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7246       PetscOptionsEnd();
7247     }
7248   }
7249   a = (Mat_MPIAIJ *)A->data;
7250   p = (Mat_MPIAIJ *)P->data;
7251   PetscCall(MatSetSizes(C, m, n, M, N));
7252   PetscCall(PetscLayoutSetUp(C->rmap));
7253   PetscCall(PetscLayoutSetUp(C->cmap));
7254   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7255   PetscCall(MatGetOptionsPrefix(C, &prefix));
7256 
7257   cp = 0;
7258   switch (ptype) {
7259   case MATPRODUCT_AB: /* A * P */
7260     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7261 
7262     /* A_diag * P_local (merged or not) */
7263     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7264       /* P is product->B */
7265       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7266       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7267       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7268       PetscCall(MatProductSetFill(mp[cp], product->fill));
7269       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7270       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7271       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7272       mp[cp]->product->api_user = product->api_user;
7273       PetscCall(MatProductSetFromOptions(mp[cp]));
7274       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7275       PetscCall(ISGetIndices(glob, &globidx));
7276       rmapt[cp] = 1;
7277       cmapt[cp] = 2;
7278       cmapa[cp] = globidx;
7279       mptmp[cp] = PETSC_FALSE;
7280       cp++;
7281     } else { /* A_diag * P_diag and A_diag * P_off */
7282       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7283       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7284       PetscCall(MatProductSetFill(mp[cp], product->fill));
7285       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7286       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7287       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7288       mp[cp]->product->api_user = product->api_user;
7289       PetscCall(MatProductSetFromOptions(mp[cp]));
7290       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7291       rmapt[cp] = 1;
7292       cmapt[cp] = 1;
7293       mptmp[cp] = PETSC_FALSE;
7294       cp++;
7295       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7296       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7297       PetscCall(MatProductSetFill(mp[cp], product->fill));
7298       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7299       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7300       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7301       mp[cp]->product->api_user = product->api_user;
7302       PetscCall(MatProductSetFromOptions(mp[cp]));
7303       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7304       rmapt[cp] = 1;
7305       cmapt[cp] = 2;
7306       cmapa[cp] = p->garray;
7307       mptmp[cp] = PETSC_FALSE;
7308       cp++;
7309     }
7310 
7311     /* A_off * P_other */
7312     if (mmdata->P_oth) {
7313       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7314       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7315       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7316       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7317       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7318       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7319       PetscCall(MatProductSetFill(mp[cp], product->fill));
7320       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7321       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7322       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7323       mp[cp]->product->api_user = product->api_user;
7324       PetscCall(MatProductSetFromOptions(mp[cp]));
7325       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7326       rmapt[cp] = 1;
7327       cmapt[cp] = 2;
7328       cmapa[cp] = P_oth_idx;
7329       mptmp[cp] = PETSC_FALSE;
7330       cp++;
7331     }
7332     break;
7333 
7334   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7335     /* A is product->B */
7336     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7337     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7338       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7339       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7340       PetscCall(MatProductSetFill(mp[cp], product->fill));
7341       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7342       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7343       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7344       mp[cp]->product->api_user = product->api_user;
7345       PetscCall(MatProductSetFromOptions(mp[cp]));
7346       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7347       PetscCall(ISGetIndices(glob, &globidx));
7348       rmapt[cp] = 2;
7349       rmapa[cp] = globidx;
7350       cmapt[cp] = 2;
7351       cmapa[cp] = globidx;
7352       mptmp[cp] = PETSC_FALSE;
7353       cp++;
7354     } else {
7355       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7356       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7357       PetscCall(MatProductSetFill(mp[cp], product->fill));
7358       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7359       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7360       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7361       mp[cp]->product->api_user = product->api_user;
7362       PetscCall(MatProductSetFromOptions(mp[cp]));
7363       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7364       PetscCall(ISGetIndices(glob, &globidx));
7365       rmapt[cp] = 1;
7366       cmapt[cp] = 2;
7367       cmapa[cp] = globidx;
7368       mptmp[cp] = PETSC_FALSE;
7369       cp++;
7370       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7371       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7372       PetscCall(MatProductSetFill(mp[cp], product->fill));
7373       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7374       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7375       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7376       mp[cp]->product->api_user = product->api_user;
7377       PetscCall(MatProductSetFromOptions(mp[cp]));
7378       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7379       rmapt[cp] = 2;
7380       rmapa[cp] = p->garray;
7381       cmapt[cp] = 2;
7382       cmapa[cp] = globidx;
7383       mptmp[cp] = PETSC_FALSE;
7384       cp++;
7385     }
7386     break;
7387   case MATPRODUCT_PtAP:
7388     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7389     /* P is product->B */
7390     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7391     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7392     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7393     PetscCall(MatProductSetFill(mp[cp], product->fill));
7394     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7395     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7396     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7397     mp[cp]->product->api_user = product->api_user;
7398     PetscCall(MatProductSetFromOptions(mp[cp]));
7399     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7400     PetscCall(ISGetIndices(glob, &globidx));
7401     rmapt[cp] = 2;
7402     rmapa[cp] = globidx;
7403     cmapt[cp] = 2;
7404     cmapa[cp] = globidx;
7405     mptmp[cp] = PETSC_FALSE;
7406     cp++;
7407     if (mmdata->P_oth) {
7408       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7409       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7410       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7411       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7412       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7413       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7414       PetscCall(MatProductSetFill(mp[cp], product->fill));
7415       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7416       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7417       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7418       mp[cp]->product->api_user = product->api_user;
7419       PetscCall(MatProductSetFromOptions(mp[cp]));
7420       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7421       mptmp[cp] = PETSC_TRUE;
7422       cp++;
7423       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7424       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7425       PetscCall(MatProductSetFill(mp[cp], product->fill));
7426       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7427       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7428       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7429       mp[cp]->product->api_user = product->api_user;
7430       PetscCall(MatProductSetFromOptions(mp[cp]));
7431       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7432       rmapt[cp] = 2;
7433       rmapa[cp] = globidx;
7434       cmapt[cp] = 2;
7435       cmapa[cp] = P_oth_idx;
7436       mptmp[cp] = PETSC_FALSE;
7437       cp++;
7438     }
7439     break;
7440   default:
7441     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7442   }
7443   /* sanity check */
7444   if (size > 1)
7445     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7446 
7447   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7448   for (i = 0; i < cp; i++) {
7449     mmdata->mp[i]    = mp[i];
7450     mmdata->mptmp[i] = mptmp[i];
7451   }
7452   mmdata->cp             = cp;
7453   C->product->data       = mmdata;
7454   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7455   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7456 
7457   /* memory type */
7458   mmdata->mtype = PETSC_MEMTYPE_HOST;
7459   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7460   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7461   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7462   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7463   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7464   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7465 
7466   /* prepare coo coordinates for values insertion */
7467 
7468   /* count total nonzeros of those intermediate seqaij Mats
7469     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7470     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7471     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7472   */
7473   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7474     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7475     if (mptmp[cp]) continue;
7476     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7477       const PetscInt *rmap = rmapa[cp];
7478       const PetscInt  mr   = mp[cp]->rmap->n;
7479       const PetscInt  rs   = C->rmap->rstart;
7480       const PetscInt  re   = C->rmap->rend;
7481       const PetscInt *ii   = mm->i;
7482       for (i = 0; i < mr; i++) {
7483         const PetscInt gr = rmap[i];
7484         const PetscInt nz = ii[i + 1] - ii[i];
7485         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7486         else ncoo_oown += nz;                  /* this row is local */
7487       }
7488     } else ncoo_d += mm->nz;
7489   }
7490 
7491   /*
7492     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7493 
7494     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7495 
7496     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7497 
7498     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7499     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7500     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7501 
7502     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7503     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7504   */
7505   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7506   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7507 
7508   /* gather (i,j) of nonzeros inserted by remote procs */
7509   if (hasoffproc) {
7510     PetscSF  msf;
7511     PetscInt ncoo2, *coo_i2, *coo_j2;
7512 
7513     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7514     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7515     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7516 
7517     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7518       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7519       PetscInt   *idxoff = mmdata->off[cp];
7520       PetscInt   *idxown = mmdata->own[cp];
7521       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7522         const PetscInt *rmap = rmapa[cp];
7523         const PetscInt *cmap = cmapa[cp];
7524         const PetscInt *ii   = mm->i;
7525         PetscInt       *coi  = coo_i + ncoo_o;
7526         PetscInt       *coj  = coo_j + ncoo_o;
7527         const PetscInt  mr   = mp[cp]->rmap->n;
7528         const PetscInt  rs   = C->rmap->rstart;
7529         const PetscInt  re   = C->rmap->rend;
7530         const PetscInt  cs   = C->cmap->rstart;
7531         for (i = 0; i < mr; i++) {
7532           const PetscInt *jj = mm->j + ii[i];
7533           const PetscInt  gr = rmap[i];
7534           const PetscInt  nz = ii[i + 1] - ii[i];
7535           if (gr < rs || gr >= re) { /* this is an offproc row */
7536             for (j = ii[i]; j < ii[i + 1]; j++) {
7537               *coi++    = gr;
7538               *idxoff++ = j;
7539             }
7540             if (!cmapt[cp]) { /* already global */
7541               for (j = 0; j < nz; j++) *coj++ = jj[j];
7542             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7543               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7544             } else { /* offdiag */
7545               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7546             }
7547             ncoo_o += nz;
7548           } else { /* this is a local row */
7549             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7550           }
7551         }
7552       }
7553       mmdata->off[cp + 1] = idxoff;
7554       mmdata->own[cp + 1] = idxown;
7555     }
7556 
7557     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7558     PetscInt incoo_o;
7559     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7560     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7561     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7562     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7563     ncoo = ncoo_d + ncoo_oown + ncoo2;
7564     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7565     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7566     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7567     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7568     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7569     PetscCall(PetscFree2(coo_i, coo_j));
7570     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7571     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7572     coo_i = coo_i2;
7573     coo_j = coo_j2;
7574   } else { /* no offproc values insertion */
7575     ncoo = ncoo_d;
7576     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7577 
7578     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7579     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7580     PetscCall(PetscSFSetUp(mmdata->sf));
7581   }
7582   mmdata->hasoffproc = hasoffproc;
7583 
7584   /* gather (i,j) of nonzeros inserted locally */
7585   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7586     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7587     PetscInt       *coi  = coo_i + ncoo_d;
7588     PetscInt       *coj  = coo_j + ncoo_d;
7589     const PetscInt *jj   = mm->j;
7590     const PetscInt *ii   = mm->i;
7591     const PetscInt *cmap = cmapa[cp];
7592     const PetscInt *rmap = rmapa[cp];
7593     const PetscInt  mr   = mp[cp]->rmap->n;
7594     const PetscInt  rs   = C->rmap->rstart;
7595     const PetscInt  re   = C->rmap->rend;
7596     const PetscInt  cs   = C->cmap->rstart;
7597 
7598     if (mptmp[cp]) continue;
7599     if (rmapt[cp] == 1) { /* consecutive rows */
7600       /* fill coo_i */
7601       for (i = 0; i < mr; i++) {
7602         const PetscInt gr = i + rs;
7603         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7604       }
7605       /* fill coo_j */
7606       if (!cmapt[cp]) { /* type-0, already global */
7607         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7608       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7609         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7610       } else {                                            /* type-2, local to global for sparse columns */
7611         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7612       }
7613       ncoo_d += mm->nz;
7614     } else if (rmapt[cp] == 2) { /* sparse rows */
7615       for (i = 0; i < mr; i++) {
7616         const PetscInt *jj = mm->j + ii[i];
7617         const PetscInt  gr = rmap[i];
7618         const PetscInt  nz = ii[i + 1] - ii[i];
7619         if (gr >= rs && gr < re) { /* local rows */
7620           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7621           if (!cmapt[cp]) { /* type-0, already global */
7622             for (j = 0; j < nz; j++) *coj++ = jj[j];
7623           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7624             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7625           } else { /* type-2, local to global for sparse columns */
7626             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7627           }
7628           ncoo_d += nz;
7629         }
7630       }
7631     }
7632   }
7633   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7634   PetscCall(ISDestroy(&glob));
7635   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7636   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7637   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7638   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7639 
7640   /* preallocate with COO data */
7641   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7642   PetscCall(PetscFree2(coo_i, coo_j));
7643   PetscFunctionReturn(PETSC_SUCCESS);
7644 }
7645 
7646 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7647 {
7648   Mat_Product *product = mat->product;
7649 #if defined(PETSC_HAVE_DEVICE)
7650   PetscBool match  = PETSC_FALSE;
7651   PetscBool usecpu = PETSC_FALSE;
7652 #else
7653   PetscBool match = PETSC_TRUE;
7654 #endif
7655 
7656   PetscFunctionBegin;
7657   MatCheckProduct(mat, 1);
7658 #if defined(PETSC_HAVE_DEVICE)
7659   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7660   if (match) { /* we can always fallback to the CPU if requested */
7661     switch (product->type) {
7662     case MATPRODUCT_AB:
7663       if (product->api_user) {
7664         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7665         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7666         PetscOptionsEnd();
7667       } else {
7668         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7669         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7670         PetscOptionsEnd();
7671       }
7672       break;
7673     case MATPRODUCT_AtB:
7674       if (product->api_user) {
7675         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7676         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7677         PetscOptionsEnd();
7678       } else {
7679         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7680         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7681         PetscOptionsEnd();
7682       }
7683       break;
7684     case MATPRODUCT_PtAP:
7685       if (product->api_user) {
7686         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7687         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7688         PetscOptionsEnd();
7689       } else {
7690         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7691         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7692         PetscOptionsEnd();
7693       }
7694       break;
7695     default:
7696       break;
7697     }
7698     match = (PetscBool)!usecpu;
7699   }
7700 #endif
7701   if (match) {
7702     switch (product->type) {
7703     case MATPRODUCT_AB:
7704     case MATPRODUCT_AtB:
7705     case MATPRODUCT_PtAP:
7706       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7707       break;
7708     default:
7709       break;
7710     }
7711   }
7712   /* fallback to MPIAIJ ops */
7713   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7714   PetscFunctionReturn(PETSC_SUCCESS);
7715 }
7716 
7717 /*
7718    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7719 
7720    n - the number of block indices in cc[]
7721    cc - the block indices (must be large enough to contain the indices)
7722 */
7723 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7724 {
7725   PetscInt        cnt = -1, nidx, j;
7726   const PetscInt *idx;
7727 
7728   PetscFunctionBegin;
7729   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7730   if (nidx) {
7731     cnt     = 0;
7732     cc[cnt] = idx[0] / bs;
7733     for (j = 1; j < nidx; j++) {
7734       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7735     }
7736   }
7737   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7738   *n = cnt + 1;
7739   PetscFunctionReturn(PETSC_SUCCESS);
7740 }
7741 
7742 /*
7743     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7744 
7745     ncollapsed - the number of block indices
7746     collapsed - the block indices (must be large enough to contain the indices)
7747 */
7748 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7749 {
7750   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7751 
7752   PetscFunctionBegin;
7753   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7754   for (i = start + 1; i < start + bs; i++) {
7755     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7756     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7757     cprevtmp = cprev;
7758     cprev    = merged;
7759     merged   = cprevtmp;
7760   }
7761   *ncollapsed = nprev;
7762   if (collapsed) *collapsed = cprev;
7763   PetscFunctionReturn(PETSC_SUCCESS);
7764 }
7765 
7766 /*
7767  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7768 
7769  Input Parameter:
7770  . Amat - matrix
7771  - symmetrize - make the result symmetric
7772  + scale - scale with diagonal
7773 
7774  Output Parameter:
7775  . a_Gmat - output scalar graph >= 0
7776 
7777 */
7778 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7779 {
7780   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7781   MPI_Comm  comm;
7782   Mat       Gmat;
7783   PetscBool ismpiaij, isseqaij;
7784   Mat       a, b, c;
7785   MatType   jtype;
7786 
7787   PetscFunctionBegin;
7788   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7789   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7790   PetscCall(MatGetSize(Amat, &MM, &NN));
7791   PetscCall(MatGetBlockSize(Amat, &bs));
7792   nloc = (Iend - Istart) / bs;
7793 
7794   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7795   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7796   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7797 
7798   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7799   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7800      implementation */
7801   if (bs > 1) {
7802     PetscCall(MatGetType(Amat, &jtype));
7803     PetscCall(MatCreate(comm, &Gmat));
7804     PetscCall(MatSetType(Gmat, jtype));
7805     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7806     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7807     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7808       PetscInt  *d_nnz, *o_nnz;
7809       MatScalar *aa, val, *AA;
7810       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7811 
7812       if (isseqaij) {
7813         a = Amat;
7814         b = NULL;
7815       } else {
7816         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7817         a             = d->A;
7818         b             = d->B;
7819       }
7820       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7821       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7822       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7823         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7824         const PetscInt *cols1, *cols2;
7825 
7826         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7827           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7828           nnz[brow / bs] = nc2 / bs;
7829           if (nc2 % bs) ok = 0;
7830           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7831           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7832             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7833             if (nc1 != nc2) ok = 0;
7834             else {
7835               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7836                 if (cols1[jj] != cols2[jj]) ok = 0;
7837                 if (cols1[jj] % bs != jj % bs) ok = 0;
7838               }
7839             }
7840             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7841           }
7842           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7843           if (!ok) {
7844             PetscCall(PetscFree2(d_nnz, o_nnz));
7845             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7846             goto old_bs;
7847           }
7848         }
7849       }
7850       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7851       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7852       PetscCall(PetscFree2(d_nnz, o_nnz));
7853       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7854       // diag
7855       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7856         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7857 
7858         ai = aseq->i;
7859         n  = ai[brow + 1] - ai[brow];
7860         aj = aseq->j + ai[brow];
7861         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7862           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7863           val        = 0;
7864           if (index_size == 0) {
7865             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7866               aa = aseq->a + ai[brow + ii] + k;
7867               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7868                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7869               }
7870             }
7871           } else {                                            // use (index,index) value if provided
7872             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7873               PetscInt ii = index[iii];
7874               aa          = aseq->a + ai[brow + ii] + k;
7875               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7876                 PetscInt jj = index[jjj];
7877                 val += PetscAbs(PetscRealPart(aa[jj]));
7878               }
7879             }
7880           }
7881           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7882           AA[k / bs] = val;
7883         }
7884         grow = Istart / bs + brow / bs;
7885         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7886       }
7887       // off-diag
7888       if (ismpiaij) {
7889         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7890         const PetscScalar *vals;
7891         const PetscInt    *cols, *garray = aij->garray;
7892 
7893         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7894         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7895           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7896           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7897             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7898             AA[k / bs] = 0;
7899             AJ[cidx]   = garray[cols[k]] / bs;
7900           }
7901           nc = ncols / bs;
7902           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7903           if (index_size == 0) {
7904             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7905               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7906               for (PetscInt k = 0; k < ncols; k += bs) {
7907                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7908                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7909                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7910                 }
7911               }
7912               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7913             }
7914           } else {                                            // use (index,index) value if provided
7915             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7916               PetscInt ii = index[iii];
7917               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7918               for (PetscInt k = 0; k < ncols; k += bs) {
7919                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7920                   PetscInt jj = index[jjj];
7921                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7922                 }
7923               }
7924               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7925             }
7926           }
7927           grow = Istart / bs + brow / bs;
7928           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7929         }
7930       }
7931       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7932       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7933       PetscCall(PetscFree2(AA, AJ));
7934     } else {
7935       const PetscScalar *vals;
7936       const PetscInt    *idx;
7937       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7938     old_bs:
7939       /*
7940        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7941        */
7942       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7943       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7944       if (isseqaij) {
7945         PetscInt max_d_nnz;
7946 
7947         /*
7948          Determine exact preallocation count for (sequential) scalar matrix
7949          */
7950         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7951         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7952         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7953         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7954         PetscCall(PetscFree3(w0, w1, w2));
7955       } else if (ismpiaij) {
7956         Mat             Daij, Oaij;
7957         const PetscInt *garray;
7958         PetscInt        max_d_nnz;
7959 
7960         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7961         /*
7962          Determine exact preallocation count for diagonal block portion of scalar matrix
7963          */
7964         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7965         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7966         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7967         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7968         PetscCall(PetscFree3(w0, w1, w2));
7969         /*
7970          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7971          */
7972         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7973           o_nnz[jj] = 0;
7974           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7975             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7976             o_nnz[jj] += ncols;
7977             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7978           }
7979           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7980         }
7981       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7982       /* get scalar copy (norms) of matrix */
7983       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7984       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7985       PetscCall(PetscFree2(d_nnz, o_nnz));
7986       for (Ii = Istart; Ii < Iend; Ii++) {
7987         PetscInt dest_row = Ii / bs;
7988 
7989         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7990         for (jj = 0; jj < ncols; jj++) {
7991           PetscInt    dest_col = idx[jj] / bs;
7992           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7993 
7994           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7995         }
7996         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7997       }
7998       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7999       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8000     }
8001   } else {
8002     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8003     else {
8004       Gmat = Amat;
8005       PetscCall(PetscObjectReference((PetscObject)Gmat));
8006     }
8007     if (isseqaij) {
8008       a = Gmat;
8009       b = NULL;
8010     } else {
8011       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8012       a             = d->A;
8013       b             = d->B;
8014     }
8015     if (filter >= 0 || scale) {
8016       /* take absolute value of each entry */
8017       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8018         MatInfo      info;
8019         PetscScalar *avals;
8020 
8021         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8022         PetscCall(MatSeqAIJGetArray(c, &avals));
8023         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8024         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8025       }
8026     }
8027   }
8028   if (symmetrize) {
8029     PetscBool isset, issym;
8030 
8031     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8032     if (!isset || !issym) {
8033       Mat matTrans;
8034 
8035       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8036       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8037       PetscCall(MatDestroy(&matTrans));
8038     }
8039     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8040   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8041   if (scale) {
8042     /* scale c for all diagonal values = 1 or -1 */
8043     Vec diag;
8044 
8045     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8046     PetscCall(MatGetDiagonal(Gmat, diag));
8047     PetscCall(VecReciprocal(diag));
8048     PetscCall(VecSqrtAbs(diag));
8049     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8050     PetscCall(VecDestroy(&diag));
8051   }
8052   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8053   if (filter >= 0) {
8054     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8055     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8056   }
8057   *a_Gmat = Gmat;
8058   PetscFunctionReturn(PETSC_SUCCESS);
8059 }
8060 
8061 /*
8062     Special version for direct calls from Fortran
8063 */
8064 
8065 /* Change these macros so can be used in void function */
8066 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8067 #undef PetscCall
8068 #define PetscCall(...) \
8069   do { \
8070     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8071     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8072       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8073       return; \
8074     } \
8075   } while (0)
8076 
8077 #undef SETERRQ
8078 #define SETERRQ(comm, ierr, ...) \
8079   do { \
8080     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8081     return; \
8082   } while (0)
8083 
8084 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8085   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8086 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8087   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8088 #else
8089 #endif
8090 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8091 {
8092   Mat         mat = *mmat;
8093   PetscInt    m = *mm, n = *mn;
8094   InsertMode  addv = *maddv;
8095   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8096   PetscScalar value;
8097 
8098   MatCheckPreallocated(mat, 1);
8099   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8100   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8101   {
8102     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8103     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8104     PetscBool roworiented = aij->roworiented;
8105 
8106     /* Some Variables required in the macro */
8107     Mat         A     = aij->A;
8108     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8109     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8110     MatScalar  *aa;
8111     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8112     Mat         B                 = aij->B;
8113     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8114     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8115     MatScalar  *ba;
8116     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8117      * cannot use "#if defined" inside a macro. */
8118     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8119 
8120     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8121     PetscInt   nonew = a->nonew;
8122     MatScalar *ap1, *ap2;
8123 
8124     PetscFunctionBegin;
8125     PetscCall(MatSeqAIJGetArray(A, &aa));
8126     PetscCall(MatSeqAIJGetArray(B, &ba));
8127     for (i = 0; i < m; i++) {
8128       if (im[i] < 0) continue;
8129       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8130       if (im[i] >= rstart && im[i] < rend) {
8131         row      = im[i] - rstart;
8132         lastcol1 = -1;
8133         rp1      = aj + ai[row];
8134         ap1      = aa + ai[row];
8135         rmax1    = aimax[row];
8136         nrow1    = ailen[row];
8137         low1     = 0;
8138         high1    = nrow1;
8139         lastcol2 = -1;
8140         rp2      = bj + bi[row];
8141         ap2      = ba + bi[row];
8142         rmax2    = bimax[row];
8143         nrow2    = bilen[row];
8144         low2     = 0;
8145         high2    = nrow2;
8146 
8147         for (j = 0; j < n; j++) {
8148           if (roworiented) value = v[i * n + j];
8149           else value = v[i + j * m];
8150           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8151           if (in[j] >= cstart && in[j] < cend) {
8152             col = in[j] - cstart;
8153             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8154           } else if (in[j] < 0) continue;
8155           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8156             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8157           } else {
8158             if (mat->was_assembled) {
8159               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8160 #if defined(PETSC_USE_CTABLE)
8161               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8162               col--;
8163 #else
8164               col = aij->colmap[in[j]] - 1;
8165 #endif
8166               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8167                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8168                 col = in[j];
8169                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8170                 B        = aij->B;
8171                 b        = (Mat_SeqAIJ *)B->data;
8172                 bimax    = b->imax;
8173                 bi       = b->i;
8174                 bilen    = b->ilen;
8175                 bj       = b->j;
8176                 rp2      = bj + bi[row];
8177                 ap2      = ba + bi[row];
8178                 rmax2    = bimax[row];
8179                 nrow2    = bilen[row];
8180                 low2     = 0;
8181                 high2    = nrow2;
8182                 bm       = aij->B->rmap->n;
8183                 ba       = b->a;
8184                 inserted = PETSC_FALSE;
8185               }
8186             } else col = in[j];
8187             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8188           }
8189         }
8190       } else if (!aij->donotstash) {
8191         if (roworiented) {
8192           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8193         } else {
8194           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8195         }
8196       }
8197     }
8198     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8199     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8200   }
8201   PetscFunctionReturnVoid();
8202 }
8203 
8204 /* Undefining these here since they were redefined from their original definition above! No
8205  * other PETSc functions should be defined past this point, as it is impossible to recover the
8206  * original definitions */
8207 #undef PetscCall
8208 #undef SETERRQ
8209